diff options
author | no author <noone@nowhere> | 2005-09-26 02:58:54 +0000 |
---|---|---|
committer | no author <noone@nowhere> | 2005-09-26 02:58:54 +0000 |
commit | 84b8431608174e74a4c0d2394eb330a6621bc74b (patch) | |
tree | ffc2bd7ce21708a9147247c80b0e7fc7728ea063 /lib | |
download | coderay-84b8431608174e74a4c0d2394eb330a6621bc74b.tar.gz |
New Repository, initial import
Diffstat (limited to 'lib')
27 files changed, 3515 insertions, 0 deletions
diff --git a/lib/coderay.rb b/lib/coderay.rb new file mode 100644 index 0000000..17c315d --- /dev/null +++ b/lib/coderay.rb @@ -0,0 +1,169 @@ +# = CodeRay +# +# CodeRay is a Ruby library for syntax highlighting. +# +# I try to make CodeRay easy to use and intuitive, but at the same time fully featured, complete, +# fast and efficient. +# +# See README. +# +# It consists mainly of +# * the main engine: CodeRay, CodeRay::Scanner, CodeRay::Tokens, CodeRay::TokenStream, CodeRay::Encoder +# * the scanners in CodeRay::Scanners +# * the encoders in CodeRay::Encoders +# +# Here's a fancy graphic to light up this gray docu: +# +# http://rd.cYcnus.de/coderay/scheme.png +# +# == Documentation +# +# See CodeRay, Encoders, Scanners, Tokens. +# +# == Usage +# +# Remember you need RubyGems to use CodeRay. Run Ruby with -rubygems option +# if required. +# +# === Highlight Ruby code in a string as html +# +# require 'coderay' +# print CodeRay.scan('puts "Hello, world!"', :ruby).compact.html.page +# +# # prints something like this: +# puts <span class="s">"Hello, world!"</span> +# +# +# === Highlight C code from a file in a html div +# +# require 'coderay' +# print CodeRay.scan(File.read('ruby.h'), :c).html.div +# # print CodeRay.scan_file('ruby.h').html.div ## not working yet +# +# You can include this div in your page. The used CSS styles can be printed with +# +# % ruby -rcoderay -e "print CodeRay::Encoders[:html]::CSS" +# +# === Highlight without typing too much +# +# If you are one of the hasty (or lazy, or extremely curious) people, just run this file: +# +# % ruby -rubygems coderay.rb +# +# If the output was to fast for you, try +# +# % ruby -rubygems coderay.rb > example.html +# +# and look at the file it created. +# +module CodeRay + + Version = '0.4.2' + + require 'coderay/tokens' + require 'coderay/scanner' + require 'coderay/encoder' + + + class << self + + # Scans the given +code+ (a String) with the Scanner for +lang+. + # + # This is a simple way to use CodeRay. Example: + # require 'coderay' + # page = CodeRay.scan("puts 'Hello, world!'", :ruby).html + # + # See also demo/demo_simple. + def scan code, lang, options = {}, &block + scanner = Scanners[lang].new code, options, &block + scanner.tokenize + end + + # Scans +filename+ (a path to a code file) with the Scanner for +lang+. + # + # If +lang+ is :auto or omitted, the CodeRay::FileType module is used to + # determine it. If it cannot find out what type it is, it uses CodeRay::Scanners::Plaintext. + # + # Calls CodeRay.scan. + # + # Example: + # require 'coderay' + # page = CodeRay.scan_file('some_c_code.c').html + def scan_file filename, lang = :auto, options = {}, &block + file = IO.read filename + if lang == :auto + require 'coderay/helpers/filetype' + lang = FileType.fetch filename, :plaintext, true + end + scan file, lang, options = {}, &block + end + + # Scan the +code+ (a string) with the scanner for +lang+. + # + # Calls scan. + # + # See CodeRay.scan. + def scan_stream code, lang, options = {}, &block + options[:stream] = true + scan code, lang, options, &block + end + + # Encode +code+ with the Encoder for +format+ and the Scanner for +lang+. + # +options+ will be passed to the Encoder. + # + # See CodeRay::Encoder.encode_stream + def encode_stream code, lang, format, options = {} + encoder(format, options).encode_stream code, lang, options + end + + def encode code, lang, format, options = {} + encoder(format, options).encode code, lang, options + end + + # Finds the Encoder class for +format+ and creates an instance, passing + # +options+ to it. + # + # Example: + # require 'coderay' + # token_count = CodeRay.encoder(:count).encodea("puts 17 + 4\n", :ruby).to_i #-> 8 + # require 'coderay' + # + # stats = CodeRay.encoder(:statistic) + # stats.encode("puts 17 + 4\n", :ruby) + # + # puts '%d out of %d tokens have the kind :integer.' % [ + # stats.type_stats[:integer].count, + # stats.real_token_count + # ] + # #-> 2 out of 4 tokens have the kind :integer. + def encoder format, options = {} + Encoders[format].new options + end + + end + + # This Exception is raised when you try to stream with something that is not + # capable of streaming. + class NotStreamableError < Exception + def initialize obj + @obj = obj + end + + def to_s + '%s is not Streamable!' % @obj.class + end + end + + # A dummy module that is included by subclasses of CodeRay::Scanner an CodeRay::Encoder + # to show that they are able to handle streams. + module Streamable + end + +end + +# Run a test script. +if $0 == __FILE__ + $stderr.print 'Press key to print demo.'; gets + code = File.read($0)[/module CodeRay.*/m] + print CodeRay.scan(code, :ruby).html +end diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb new file mode 100644 index 0000000..5f6d511 --- /dev/null +++ b/lib/coderay/encoder.rb @@ -0,0 +1,210 @@ +module CodeRay
+
+ # This module holds class Encoder and its subclasses.
+ # For example, the HTML encoder is named CodeRay::Encoders::HTML
+ # can be found in coderay/encoders/html.
+ #
+ # Encoders also provides methods and constants for the register mechanism
+ # and the [] method that returns the Encoder class belonging to the
+ # given format.
+ module Encoders
+
+ # Raised if Encoders[] fails because:
+ # * an file could not be found
+ # * the requested Encoder is not registered
+ EncoderNotFound = Class.new Exception
+
+ # Loaded Encoders are saved here.
+ ENCODERS = Hash.new do |h, lang|
+ path = Encoders.path_to lang
+ lang = lang.to_sym
+ begin
+ require path
+ rescue LoadError
+ raise EncoderNotFound, "#{path} not found."
+ else
+ # Encoder should have registered by now
+ unless h[lang]
+ raise EncoderNotFound, "No Encoder for #{lang} found in #{path}."
+ end
+ end
+ h[lang]
+ end
+
+ class << self
+
+ # Every Encoder class must register itself for one or more +formats+
+ # by calling register_for, which calls this method.
+ #
+ # See CodeRay::Encoder.register_for.
+ def register encoder_class, *formats
+ for format in formats
+ ENCODERS[format.to_sym] = encoder_class
+ end
+ end
+
+ # Returns the Encoder for +lang+.
+ #
+ # Example:
+ # require 'coderay'
+ # yaml_encoder = CodeRay::Encoders[:yaml]
+ def [] lang
+ ENCODERS[lang]
+ end
+
+ # Alias for +[]+.
+ alias load []
+
+ # Returns the path to the encoder for format.
+ def path_to plugin
+ File.join 'coderay', 'encoders', "#{plugin}.rb"
+ end
+
+ end
+
+
+ # The Encoder base class. Together with CodeRay::Scanner and
+ # CodeRay::Tokens, it forms the highlighting triad.
+ #
+ # Encoder instances take a Tokens object and do something with it.
+ #
+ # The most common Encoder is surely the HTML encoder
+ # (CodeRay::Encoders::HTML). It highlights the code in a colorful
+ # html page.
+ # If you want the highlighted code in a div or a span instead,
+ # use its subclasses Div and Span.
+ class Encoder
+
+ attr_reader :token_stream
+
+ class << self
+
+ # Register this class for the given langs.
+ #
+ # Example:
+ # class MyEncoder < CodeRay::Encoders:Encoder
+ # register_for :myenc
+ # ...
+ # end
+ #
+ # See Encoder.register.
+ def register_for *args
+ Encoders.register self, *args
+ end
+
+ # Returns if the Encoder can be used in streaming mode.
+ def streamable?
+ is_a? Streamable
+ end
+
+ # If FILE_EXTENSION isn't defined, this method returns the downcase
+ # class name instead.
+ def const_missing sym
+ if sym == :FILE_EXTENSION
+ sym.to_s.downcase
+ else
+ super
+ end
+ end
+
+ end
+
+ # Subclasses are to store their default options in this constant.
+ DEFAULT_OPTIONS = { :stream => false }
+
+ # The options you gave the Encoder at creating.
+ attr_accessor :options
+
+ # Creates a new Encoder.
+ # +options+ is saved and used for all encode operations, as long as you
+ # don't overwrite it there by passing additional options.
+ #
+ # Encoder objects provide three encode methods:
+ # - encode simply takes a +code+ string and a +lang+
+ # - encode_tokens expects a +tokens+ object instead
+ # - encode_stream is like encode, but uses streaming mode.
+ #
+ # Each method has an optional +options+ parameter. These are added to
+ # the options you passed at creation.
+ def initialize options = {}
+ @options = self.class::DEFAULT_OPTIONS.merge options
+ raise "I am only the basic Encoder class. I can't encode anything. :(\n" +
+ "Use my subclasses." if self.class == Encoder
+ end
+
+ # Encode a Tokens object.
+ def encode_tokens tokens, options = {}
+ options = @options.merge options
+ setup options
+ compile tokens, options
+ finish options
+ end
+
+ # Encode the given +code+ after tokenizing it using the Scanner for
+ # +lang+.
+ def encode code, lang, options = {}
+ options = @options.merge options
+ scanner_options = options.fetch(:scanner_options, {})
+ tokens = CodeRay.scan code, lang, scanner_options
+ encode_tokens tokens, options
+ end
+
+ # You can use highlight instead of encode, if that seems
+ # more clear to you.
+ alias highlight encode
+
+ # Encode the given +code+ using the Scanner for +lang+ in streaming
+ # mode.
+ def encode_stream code, lang, options = {}
+ raise NotStreamableError, self unless kind_of? Streamable
+ options = @options.merge options
+ setup options
+ scanner_options = options.fetch :scanner_options, {}
+ @token_stream = CodeRay.scan_stream code, lang, scanner_options, &self
+ finish options
+ end
+
+ # Behave like a proc. The tokens method is converted to a proc.
+ def to_proc
+ method(:token).to_proc
+ end
+
+ protected
+
+ # Called with merged options before encoding starts.
+ # Sets @out to an empty string.
+ #
+ # See the HTML Encoder for an example of option caching.
+ def setup options
+ @out = ''
+ end
+
+ # Called with +text+ and +kind+ of the currently scanned token.
+ # For simple scanners, it's enougth to implement this method.
+ #
+ # Raises a NotImplementedError exception if it is not overwritten in
+ # subclass.
+ def token text, kind
+ raise NotImplementedError, "#{self.class}#token not implemented."
+ end
+
+ # Called with merged options after encoding starts.
+ # The return value is the result of encoding, typically @out.
+ def finish options
+ @out
+ end
+
+ # Do the encoding.
+ #
+ # The already created +tokens+ object must be used; it can be a
+ # TokenStream or a Tokens object.
+ def compile tokens, options
+ tokens.each(&self)
+ end
+
+ end
+
+ end
+end
+
+# vim:sw=2:ts=2:et:tw=78
diff --git a/lib/coderay/encoders/count.rb b/lib/coderay/encoders/count.rb new file mode 100644 index 0000000..80aec57 --- /dev/null +++ b/lib/coderay/encoders/count.rb @@ -0,0 +1,20 @@ +module CodeRay
+module Encoders
+
+ class Count < Encoder
+
+ register_for :count
+
+ protected
+
+ def setup options
+ @out = 0
+ end
+
+ def token text, kind
+ @out += 1
+ end
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/div.rb b/lib/coderay/encoders/div.rb new file mode 100644 index 0000000..640df0e --- /dev/null +++ b/lib/coderay/encoders/div.rb @@ -0,0 +1,16 @@ +module CodeRay module Encoders
+
+ require 'coderay/encoders/html'
+ class Div < HTML
+
+ FILE_EXTENSION = 'div.html'
+
+ register_for :div
+
+ DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
+ :css => :style,
+ :wrap => :div,
+ })
+ end
+
+end end
diff --git a/lib/coderay/encoders/helpers/html_css.rb b/lib/coderay/encoders/helpers/html_css.rb new file mode 100644 index 0000000..f9cadf7 --- /dev/null +++ b/lib/coderay/encoders/helpers/html_css.rb @@ -0,0 +1,168 @@ +module CodeRay module Encoders
+
+ class HTML
+ class CSS
+
+ def initialize stylesheet = TOKENS
+ @classes = Hash.new
+ parse stylesheet
+ end
+
+ def [] *styles
+ cl = @classes[styles.first]
+ return '' unless cl
+ style = false
+ 1.upto(cl.size + 1) do |offset|
+ break if style = cl[styles[offset .. -1]]
+ end
+ return style
+ end
+
+ private
+
+ CSS_CLASS = /
+ ( (?: # $1 = classes
+ \s* \. [-\w]+
+ )+ )
+ \s* \{
+ ( [^\}]* ) # $2 = style
+ \} \s*
+ |
+ ( . ) # $3 = error
+ /mx
+ def parse stylesheet
+ stylesheet.scan CSS_CLASS do |classes, style, error|
+ raise "CSS parse error: '#{error}' not recognized" if error
+ styles = classes.scan(/[-\w]+/)
+ cl = styles.pop
+ @classes[cl] ||= Hash.new
+ @classes[cl][styles] = style.strip
+ end
+ end
+
+ MAIN = <<-'MAIN'
+.code {
+ background-color: #FAFAFA;
+ border: 1px solid #D1D7DC;
+ font-family: 'Courier New', 'Terminal', monospace;
+ font-size: 10pt;
+ color: black;
+ vertical-align: top;
+ text-align: left;
+ padding: 0px;
+}
+span.code { white-space: pre; }
+.code tt { font-weight: bold; }
+.code pre {
+ font-size: 10pt;
+ margin: 0px 5px;
+}
+.code .code_table {
+ margin: 0px;
+}
+.code .line_numbers {
+ margin: 0px;
+ background-color:#DEF; color: #777;
+ vertical-align: top;
+ text-align: right;
+}
+.code .code_cell {
+ width: 100%;
+ background-color:#FAFAFA;
+ color: black;
+ vertical-align: top;
+ text-align: left;
+}
+.code .no {
+ background-color:#DEF;
+ color: #777;
+ padding: 0px 5px;
+ font-weight: normal;
+ font-style: normal;
+}
+
+.code tt { display: hidden; }
+
+ MAIN
+
+ TOKENS = <<-'TOKENS'
+.af { color:#00C; }
+.an { color:#007; }
+.av { color:#700; }
+.aw { color:#C00; }
+.bi { color:#509; font-weight:bold; }
+.c { color:#888; }
+
+.ch { color:#04D; /* background-color:#f0f0ff; */ }
+.ch .k { color:#04D; }
+.ch .dl { color:#039; }
+
+.cl { color:#B06; font-weight:bold; }
+.co { color:#036; font-weight:bold; }
+.cr { color:#0A0; }
+.cv { color:#369; }
+.df { color:#099; font-weight:bold; }
+.di { color:#088; font-weight:bold; }
+.dl { color:black; }
+.do { color:#970; }
+.ds { color:#D42; font-weight:bold; }
+.e { color:#666; font-weight:bold; }
+.er { color:#F00; background-color:#FAA; }
+.ex { color:#F00; font-weight:bold; }
+.fl { color:#60E; font-weight:bold; }
+.fu { color:#06B; font-weight:bold; }
+.gv { color:#d70; font-weight:bold; }
+.hx { color:#058; font-weight:bold; }
+.i { color:#00D; font-weight:bold; }
+.ic { color:#B44; font-weight:bold; }
+.in { color:#B2B; font-weight:bold; }
+.iv { color:#33B; }
+.la { color:#970; font-weight:bold; }
+.lv { color:#963; }
+.oc { color:#40E; font-weight:bold; }
+.on { color:#000; font-weight:bold; }
+.pc { color:#038; font-weight:bold; }
+.pd { color:#369; font-weight:bold; }
+.pp { color:#579; }
+.pt { color:#339; font-weight:bold; }
+.r { color:#080; font-weight:bold; }
+
+.rx { background-color:#fff0ff; }
+.rx .k { color:#808; }
+.rx .dl { color:#404; }
+.rx .mod { color:#C2C; }
+.rx .fu { color:#404; font-weight: bold; }
+
+.s { background-color:#fff0f0; }
+.s .s { background-color:#ffe0e0; }
+.s .s .s { background-color:#ffd0d0; }
+.s .k { color:#D20; }
+.s .dl { color:#710; }
+
+.sh { background-color:#f0fff0; }
+.sh .k { color:#2B2; }
+.sh .dl { color:#161; }
+
+.sy { color:#A60; }
+.sy .k { color:#A60; }
+.sy .dl { color:#630; }
+
+.ta { color:#070; }
+.tf { color:#070; font-weight:bold; }
+.ts { color:#D70; font-weight:bold; }
+.ty { color:#339; font-weight:bold; }
+.v { color:#036; }
+.xt { color:#444; }
+ TOKENS
+
+ DEFAULT_STYLESHEET = MAIN + TOKENS
+
+ end
+ end
+
+end end
+
+if $0 == __FILE__
+ require 'pp'
+ pp CodeRay::Encoders::HTML::CSS.new
+end
diff --git a/lib/coderay/encoders/helpers/html_helper.rb b/lib/coderay/encoders/helpers/html_helper.rb new file mode 100644 index 0000000..03ea0a2 --- /dev/null +++ b/lib/coderay/encoders/helpers/html_helper.rb @@ -0,0 +1,68 @@ +module CodeRay module Encoders
+
+ class HTML
+
+ ClassOfKind = {
+ :attribute_name => 'an',
+ :attribute_name_fat => 'af',
+ :attribute_value => 'av',
+ :attribute_value_fat => 'aw',
+ :bin => 'bi',
+ :char => 'ch',
+ :class => 'cl',
+ :class_variable => 'cv',
+ :color => 'cr',
+ :comment => 'c',
+ :constant => 'co',
+ :content => 'k',
+ :definition => 'df',
+ :delimiter => 'dl',
+ :directive => 'di',
+ :doc => 'do',
+ :doc_string => 'ds',
+ :error => 'er',
+ :escape => 'e',
+ :exception => 'ex',
+ :float => 'fl',
+ :function => 'fu',
+ :global_variable => 'gv',
+ :hex => 'hx',
+ :include => 'ic',
+ :instance_variable => 'iv',
+ :integer => 'i',
+ :interpreted => 'in',
+ :label => 'la',
+ :local_variable => 'lv',
+ :modifier => 'mod',
+ :oct => 'oc',
+ :operator_name => 'on',
+ :pre_constant => 'pc',
+ :pre_type => 'pt',
+ :predefined => 'pd',
+ :preprocessor => 'pp',
+ :regexp => 'rx',
+ :reserved => 'r',
+ :shell => 'sh',
+ :string => 's',
+ :symbol => 'sy',
+ :tag => 'ta',
+ :tag_fat => 'tf',
+ :tag_special => 'ts',
+ :type => 'ty',
+ :variable => 'v',
+ :xml_text => 'xt',
+
+ :ident => :NO_HIGHLIGHT, # 'id'
+ :operator => :NO_HIGHLIGHT, # 'op'
+ :space => :NO_HIGHLIGHT, # 'sp'
+ :plain => :NO_HIGHLIGHT,
+ }
+ ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function]
+ ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter]
+ ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
+ ClassOfKind[:escape] = ClassOfKind[:delimiter]
+ ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
+
+ end
+
+end end
diff --git a/lib/coderay/encoders/helpers/html_output.rb b/lib/coderay/encoders/helpers/html_output.rb new file mode 100644 index 0000000..e2b26e7 --- /dev/null +++ b/lib/coderay/encoders/helpers/html_output.rb @@ -0,0 +1,240 @@ +module CodeRay
+ module Encoders
+
+ class HTML
+
+ # This module is included in the output String from thew HTML Encoder.
+ #
+ # It provides methods like wrap, div, page etc.
+ #
+ # Remember to use #clone instead of #dup to keep the modules the object was
+ # extended with.
+ #
+ # TODO: more doc.
+ module Output
+
+ class << self
+
+ # This makes Output look like a class.
+ #
+ # Example:
+ #
+ # a = Output.new '<span class="co">Code</span>'
+ # a.wrap! :page
+ def new string, element = nil
+ output = string.clone.extend self
+ output.wrapped_in = element
+ output
+ end
+
+ # Raises an exception if an object that doesn't respond to to_str is extended by Output,
+ # to prevent users from misuse. Use Module#remove_method to disable.
+ def extended o
+ warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str
+ end
+
+ def page_template_for_css css = :default
+ css = CSS::DEFAULT_STYLESHEET if css == :default
+ PAGE.apply 'CSS', css
+ end
+
+ # Define a new wrapper. This is meta programming.
+ def wrapper *wrappers
+ wrappers.each do |wrapper|
+ define_method wrapper do |*args|
+ wrap wrapper, *args
+ end
+ define_method(:"#{wrapper}!") do |*args|
+ wrap! wrapper, *args
+ end
+ end
+ end
+ end
+
+ wrapper :div, :span, :page
+
+ def wrapped_in
+ @wrapped_in || nil
+ end
+ attr_writer :wrapped_in
+
+ def wrapped_in? element
+ wrapped_in == element
+ end
+
+ def wrap_in template
+ clone.wrap_in! template
+ end
+
+ def wrap_in! template
+ Template.wrap! self, template, 'CONTENT'
+ self
+ end
+
+ def wrap! element, *args
+ return self if not element or element == wrapped_in
+ case element
+ when :div
+ raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
+ wrap_in! DIV
+ when :span
+ raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
+ wrap_in! SPAN
+ when :page
+ wrap! :div if wrapped_in? nil
+ raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div
+ wrap_in! Output.page_template_for_css
+ when nil
+ return self
+ else
+ raise "Unknown value %p for :wrap" % element
+ end
+ @wrapped_in = element
+ self
+ end
+
+ def wrap *args
+ clone.wrap!(*args)
+ end
+
+ def numerize! mode = :table, options = {}
+ return self unless mode
+
+ offset = options.fetch :line_numbers_offset, DEFAULT_OPTIONS[:line_numbers_offset]
+ unless offset.is_a? Integer
+ raise ArgumentError, "Invalid value %p for :offset; Integer expected." % offset
+ end
+
+ unless NUMERIZABLE_WRAPPINGS.include? options[:wrap]
+ raise ArgumentError, "Can't numerize, :wrap must be in %p, but is %p" % [NUMERIZABLE_WRAPPINGS, options[:wrap]]
+ end
+
+ bold_every = options.fetch :bold_every, DEFAULT_OPTIONS[:bold_every]
+ bolding =
+ if bold_every == :no_bolding or bold_every == 0
+ proc { |line| line.to_s }
+ elsif bold_every.is_a? Integer
+ proc do |line|
+ if line % bold_every == 0
+ "<strong>#{line}</strong>" # every bold_every-th number in bold
+ else
+ line.to_s
+ end
+ end
+ else
+ raise ArgumentError, "Invalid value %p for :bolding; :no_bolding or Integer expected." % bolding
+ end
+
+ line_count = count("\n")
+ line_count += 1 if self[-1] != ?\n
+
+ case mode
+ when :inline
+ max_width = line_count.to_s.size
+ line = offset - 1
+ gsub!(/^/) do
+ line += 1
+ line_number = bolding.call line
+ "<span class=\"no\">#{ line_number.rjust(max_width) }</span> "
+ end
+ wrap! :div
+
+ when :table
+ # This is really ugly.
+ # Because even monospace fonts seem to have different heights when bold,
+ # I make the newline bold, both in the code and the line numbers.
+ # FIXME Still not working perfect for Mr. Internet Exploder
+ line_numbers = (offset ... offset + line_count).to_a.map(&bolding).join("\n")
+ line_numbers << "\n" # also for Mr. MS Internet Exploder :-/
+ line_numbers.gsub!(/\n/) { "<tt>\n</tt>" }
+
+ line_numbers_tpl = DIV_TABLE.apply('LINE_NUMBERS', line_numbers)
+ gsub!(/\n/) { "<tt>\n</tt>" }
+ wrap_in! line_numbers_tpl
+ @wrapped_in = :div
+
+ else
+ raise ArgumentError, "Unknown value %p for mode: :inline or :table expected" % mode
+ end
+
+ self
+ end
+
+ def numerize *args
+ clone.numerize!(*args)
+ end
+
+ class Template < String
+
+ def self.wrap! str, template, target
+ target = Regexp.new(Regexp.escape("<%#{target}%>"))
+ if template =~ target
+ str[0,0] = $`
+ str << $'
+ else
+ raise "Template target <%%%p%%> not found" % target
+ end
+ end
+
+ def apply target, replacement
+ target = Regexp.new(Regexp.escape("<%#{target}%>"))
+ if self =~ target
+ Template.new($` + replacement + $')
+ else
+ raise "Template target <%%%p%%> not found" % target
+ end
+ end
+
+ module Simple
+ def ` str #`
+ Template.new str
+ end
+ end
+ end
+
+ extend Template::Simple
+
+#-- don't include the templates in docu
+
+ SPAN = `<span class="code"><%CONTENT%></span>`
+
+ DIV, DIV_TABLE, PAGE =
+ <<-`DIV`, <<-`DIV_TABLE`, <<-`PAGE`
+
+<div class="code">
+<pre><%CONTENT%></pre>
+</div>
+ DIV
+
+<div class="code">
+ <table class="code_table">
+ <tr>
+ <td class="line_numbers"><pre><%LINE_NUMBERS%></pre></td>
+ <td class="code_cell"><div class="nowrap"><pre><%CONTENT%></pre></div></td>
+ </tr>
+ </table>
+</div>
+ DIV_TABLE
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="de">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=iso-8859-1" />
+ <title>CodeRay HTML Encoder Example</title>
+ <style type="text/css">
+<%CSS%>
+ </style>
+</head>
+<body style="background-color: white;">
+<%CONTENT%>
+</body>
+</html>
+ PAGE
+
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb new file mode 100644 index 0000000..69b6e22 --- /dev/null +++ b/lib/coderay/encoders/html.rb @@ -0,0 +1,167 @@ +module CodeRay
+module Encoders
+
+ class HTML < Encoder
+
+ include Streamable
+ register_for :html
+
+ FILE_EXTENSION = 'html'
+
+ DEFAULT_OPTIONS = {
+ :tab_width => 8,
+
+ :level => :xhtml,
+ :css => :class,
+
+ :wrap => :page,
+ :line_numbers => :table,
+ :line_numbers_offset => 1,
+ :bold_every => 10,
+ }
+ NUMERIZABLE_WRAPPINGS = [:div, :page]
+
+ require 'coderay/encoders/helpers/html_helper'
+ require 'coderay/encoders/helpers/html_output'
+ require 'coderay/encoders/helpers/html_css'
+
+ def initialize(*)
+ super
+ @last_options = nil
+ end
+
+ protected
+
+ HTML_ESCAPE = { #:nodoc:
+ '&' => '&',
+ '"' => '"',
+ '>' => '>',
+ '<' => '<',
+ }
+
+ # This is to prevent illegal HTML.
+ # Strange chars should still be avoided in codes.
+ evil_chars = Array(0x00...0x20) - [?n, ?t]
+ evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' }
+ ansi_chars = Array(0x7f..0xff)
+ ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i }
+ # \x9 (\t) and \xA (\n) not included
+ HTML_ESCAPE_PATTERN = /[&"><\0-\x8\xB-\x1f\x7f-\xff]/
+
+ def setup options
+ if options[:line_numbers] and not NUMERIZABLE_WRAPPINGS.include? options[:wrap]
+ warn ':line_numbers wanted, but :wrap is %p' % options[:wrap]
+ end
+ super
+ return if options == @last_options
+ @last_options = options
+
+ @HTML_ESCAPE = HTML_ESCAPE.dup
+ @HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
+
+ @opened = [nil]
+ @css = CSS.new
+
+ case options[:css]
+
+ when :class
+ @css_style = Hash.new do |h, k|
+ if k.is_a? Array
+ type = k.first
+ else
+ type = k
+ end
+ c = ClassOfKind[type]
+ if c == :NO_HIGHLIGHT
+ h[k] = false
+ else
+ if options[:debug]
+ debug_info = ' title="%p"' % [ k ]
+ else
+ debug_info = ''
+ end
+ h[k] = '<span%s class="%s">' % [debug_info, c]
+ end
+ end
+
+ when :style
+ @css_style = Hash.new do |h, k|
+ if k.is_a? Array
+ styles = k.dup
+ else
+ styles = [k]
+ end
+ styles.map! { |c| ClassOfKind[c] }
+ if styles.first == :NO_HIGHLIGHT
+ h[k] = false
+ else
+ if options[:debug]
+ debug_info = ' title="%s"' % [ styles.inspect.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } ]
+ else
+ debug_info = ''
+ end
+ style = @css[*styles]
+ h[k] =
+ if style
+ '<span%s style="%s">' % [debug_info, style]
+ else
+ false
+ end
+ end
+ end
+
+ else
+ raise "Unknown value %p for :css." % options[:css]
+
+ end
+ end
+
+ def finish options
+ not_needed = @opened.shift
+ @out << '</span>' * @opened.size
+
+ @out.extend Output
+ @out.numerize! options[:line_numbers], options # if options[:line_numbers]
+ @out.wrap! options[:wrap] # if options[:wrap]
+
+ #require 'pp'
+ #pp @css_style, @css_style.size
+
+ super
+ end
+
+ def token text, type
+ if text.is_a? String
+ # be careful when streaming: text is changed!
+ text.gsub!(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
+ @opened[0] = type
+ style = @css_style[@opened]
+ if style
+ @out << style << text << '</span>'
+ else
+ @out << text
+ end
+ else
+ case text
+ when :open
+ @opened[0] = type
+ @out << @css_style[@opened]
+ @opened << type
+ when :close
+ unless @opened.empty?
+ raise 'Not Token to be closed.' unless @opened.size > 1
+ @out << '</span>'
+ @opened.pop
+ end
+ when nil
+ raise 'Token with nil as text was given: %p' % [[text, type]]
+ else
+ raise 'unknown token kind: %p' % text
+ end
+ end
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/null.rb b/lib/coderay/encoders/null.rb new file mode 100644 index 0000000..67c4987 --- /dev/null +++ b/lib/coderay/encoders/null.rb @@ -0,0 +1,20 @@ +module CodeRay
+ module Encoders
+
+ class Null < Encoder
+
+ include Streamable
+ register_for :null
+
+ protected
+
+ def token(*)
+ # do nothing
+ end
+
+ end
+
+ end
+end
+
+
diff --git a/lib/coderay/encoders/span.rb b/lib/coderay/encoders/span.rb new file mode 100644 index 0000000..a7715f4 --- /dev/null +++ b/lib/coderay/encoders/span.rb @@ -0,0 +1,17 @@ +module CodeRay module Encoders
+
+ require 'coderay/encoders/html'
+ class Span < HTML
+
+ FILE_EXTENSION = 'span.html'
+
+ register_for :span
+
+ DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
+ :css => :style,
+ :wrap => :span,
+ :line_numbers => nil,
+ })
+ end
+
+end end
diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb new file mode 100644 index 0000000..0685c03 --- /dev/null +++ b/lib/coderay/encoders/statistic.rb @@ -0,0 +1,74 @@ +module CodeRay module Encoders
+
+ # Makes a statistic for the given tokens.
+ class Statistic < Encoder
+
+ include Streamable
+ register_for :stats, :statistic
+
+ attr_reader :type_stats, :real_token_count
+
+ protected
+
+ TypeStats = Struct.new :count, :size
+
+ def setup options
+ @type_stats = Hash.new { |h, k| h[k] = TypeStats.new 0, 0 }
+ @real_token_count = 0
+ end
+
+ def generate tokens, options
+ @tokens = tokens
+ super
+ end
+
+ def token text, type
+ @type_stats['TOTAL'].count += 1
+ if text.is_a? String
+ @real_token_count += 1 unless type == :space
+ @type_stats[type].count += 1
+ @type_stats[type].size += text.size
+ @type_stats['TOTAL'].size += text.size
+ else
+ @content_type = type
+ @type_stats['open/close'].count += 1
+ end
+ end
+
+ STATS = <<-STATS
+
+Code Statistics
+
+Tokens %8d
+ Non-Whitespace %8d
+Bytes Total %8d
+
+Token Types (%d):
+ type count ratio size (average)
+-------------------------------------------------------------
+%s
+ STATS
+# space 12007 33.81 % 1.7
+ TOKEN_TYPES_ROW = <<-TKR
+ %-20s %8d %6.2f %% %5.1f
+ TKR
+
+ def finish options
+ all = @type_stats['TOTAL']
+ all_count, all_size = all.count, all.size
+ @type_stats.each do |type, stat|
+ stat.size /= stat.count.to_f
+ end
+ types_stats = @type_stats.sort_by { |k, v| -v.count }.map do |k, v|
+ TOKEN_TYPES_ROW % [k, v.count, 100.0 * v.count / all_count, v.size]
+ end.join
+ STATS % [
+ all_count, @real_token_count, all_size,
+ @type_stats.delete_if { |k, v| k.is_a? String }.size,
+ types_stats
+ ]
+ end
+
+ end
+
+end end
diff --git a/lib/coderay/encoders/text.rb b/lib/coderay/encoders/text.rb new file mode 100644 index 0000000..4f0a754 --- /dev/null +++ b/lib/coderay/encoders/text.rb @@ -0,0 +1,33 @@ +module CodeRay
+ module Encoders
+
+ class Text < Encoder
+
+ include Streamable
+ register_for :text
+
+ FILE_EXTENSION = 'txt'
+
+ DEFAULT_OPTIONS = {
+ :separator => ''
+ }
+
+ protected
+ def setup options
+ super
+ @sep = options[:separator]
+ end
+
+ def token text, kind
+ return unless text.respond_to :to_str
+ @out << text + @sep
+ end
+
+ def finish options
+ @out.chomp @sep
+ end
+
+ end
+
+ end
+end
diff --git a/lib/coderay/encoders/tokens.rb b/lib/coderay/encoders/tokens.rb new file mode 100644 index 0000000..4573307 --- /dev/null +++ b/lib/coderay/encoders/tokens.rb @@ -0,0 +1,44 @@ +module CodeRay
+ module Encoders
+
+ # The Tokens encoder converts the tokens to a simple
+ # readable format. It doesn't use colors and is mainly
+ # intended for console output.
+ #
+ # The tokens are converted with Tokens.write_token.
+ #
+ # The format is:
+ #
+ # <token-kind> \t <escaped token-text> \n
+ #
+ # Example:
+ #
+ # require 'coderay'
+ # puts CodeRay.scan("puts 3 + 4", :ruby).tokens
+ #
+ # prints:
+ #
+ # ident puts
+ # space
+ # integer 3
+ # space
+ # operator +
+ # space
+ # integer 4
+ #
+ class Tokens < Encoder
+
+ include Streamable
+ register_for :tokens
+
+ FILE_EXTENSION = 'tok'
+
+ protected
+ def token *args
+ @out << CodeRay::Tokens.write_token(*args)
+ end
+
+ end
+
+ end
+end
diff --git a/lib/coderay/encoders/yaml.rb b/lib/coderay/encoders/yaml.rb new file mode 100644 index 0000000..4e2b7a1 --- /dev/null +++ b/lib/coderay/encoders/yaml.rb @@ -0,0 +1,19 @@ +module CodeRay
+ module Encoders
+
+ class YAML < Encoder
+
+ register_for :yaml
+
+ FILE_EXTENSION = 'yaml'
+
+ protected
+ def compile tokens, options
+ require 'yaml'
+ @out = tokens.to_a.to_yaml
+ end
+
+ end
+
+ end
+end
diff --git a/lib/coderay/helpers/filetype.rb b/lib/coderay/helpers/filetype.rb new file mode 100644 index 0000000..7f34c35 --- /dev/null +++ b/lib/coderay/helpers/filetype.rb @@ -0,0 +1,145 @@ +# =FileType
+#
+# A simple filetype recognizer
+#
+# Author: murphy (mail to murphy cYcnus de)
+#
+# Version: 0.1 (2005.september.1)
+#
+# ==Documentation
+#
+# TODO
+#
+module FileType
+
+ UnknownFileType = Class.new Exception
+
+ class << self
+
+ def [] filename, read_shebang = false
+ name = File.basename filename
+ ext = File.extname name
+ ext.sub!(/^\./, '') # delete the leading dot
+
+ type =
+ TypeFromExt[ext] ||
+ TypeFromExt[ext.downcase] ||
+ TypeFromName[name] ||
+ TypeFromName[name.downcase]
+ type ||= shebang(filename) if read_shebang
+
+ type
+ end
+
+ def shebang filename
+ begin
+ File.open filename, 'r' do |f|
+ first_line = f.gets
+ first_line[TypeFromShebang]
+ end
+ rescue IOError
+ nil
+ end
+ end
+
+ # This works like Hash#fetch.
+ def fetch filename, default = nil, read_shebang = false
+ if default and block_given?
+ warn 'block supersedes default value argument'
+ end
+
+ unless type = self[filename, read_shebang]
+ return yield if block_given?
+ return default if default
+ raise UnknownFileType, 'Could not determine type of %p.' % filename
+ end
+ type
+ end
+
+ end
+
+ TypeFromExt = {
+ 'rb' => :ruby,
+ 'rbw' => :ruby,
+ 'cpp' => :cpp,
+ 'c' => :c,
+ 'h' => :c,
+ 'xml' => :xml,
+ 'htm' => :html,
+ 'html' => :html,
+ }
+
+ TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/
+
+ TypeFromName = {
+ 'Rakefile' => :ruby,
+ 'Rantfile' => :ruby,
+ }
+
+end
+
+if $0 == __FILE__
+ $VERBOSE = true
+ eval DATA.read, nil, $0, __LINE__+4
+end
+
+__END__
+
+require 'test/unit'
+
+class TC_FileType < Test::Unit::TestCase
+
+ def test_fetch
+ assert_raise FileType::UnknownFileType do
+ FileType.fetch ''
+ end
+
+ assert_throws :not_found do
+ FileType.fetch '.' do
+ throw :not_found
+ end
+ end
+
+ assert_equal :default, FileType.fetch('c', :default)
+
+ stderr, fake_stderr = $stderr, Object.new
+ $err = ''
+ def fake_stderr.write x
+ $err << x
+ end
+ $stderr = fake_stderr
+ FileType.fetch('c', :default) { }
+ assert_equal "block supersedes default value argument\n", $err
+ $stderr = stderr
+ end
+
+ def test_ruby
+ assert_equal :ruby, FileType['test.rb']
+ assert_equal :ruby, FileType['C:\\Program Files\\x\\y\\c\\test.rbw']
+ assert_equal :ruby, FileType['/usr/bin/something/Rakefile']
+ assert_equal :ruby, FileType['~/myapp/gem/Rantfile']
+ assert_not_equal :ruby, FileType['test_rb']
+ assert_not_equal :ruby, FileType['Makefile']
+ assert_not_equal :ruby, FileType['set.rb/set']
+ assert_not_equal :ruby, FileType['~/projects/blabla/rb']
+ end
+
+ def test_c
+ assert_equal :c, FileType['test.c']
+ assert_equal :c, FileType['C:\\Program Files\\x\\y\\c\\test.h']
+ assert_not_equal :c, FileType['test_c']
+ assert_not_equal :c, FileType['Makefile']
+ assert_not_equal :c, FileType['set.h/set']
+ assert_not_equal :c, FileType['~/projects/blabla/c']
+ end
+
+ def test_shebang
+ dir = './test'
+ if File.directory? dir
+ Dir.chdir dir do
+ assert_equal :c, FileType['test.c']
+ end
+ end
+ end
+
+end
diff --git a/lib/coderay/helpers/gzip_simple.rb b/lib/coderay/helpers/gzip_simple.rb new file mode 100644 index 0000000..02d1ffd --- /dev/null +++ b/lib/coderay/helpers/gzip_simple.rb @@ -0,0 +1,123 @@ +# =GZip Simple
+#
+# A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.)
+#
+# Author: murphy (mail to murphy cYcnus de)
+#
+# Version: 0.2 (2005.may.28)
+#
+# ==Documentation
+#
+# See +GZip+ module and the +String+ extensions.
+#
+module GZip
+
+ require 'zlib'
+
+ # The default zipping level. 7 zips good and fast.
+ DEFAULT_GZIP_LEVEL = 7
+
+ # Unzips the given string +s+.
+ #
+ # Example:
+ # require 'gzip_simple'
+ # print GZip.gunzip(File.read('adresses.gz'))
+ #
+ def GZip.gunzip s
+ Zlib::Inflate.inflate s
+ end
+
+ # Zips the given string +s+.
+ #
+ # Example:
+ # require 'gzip_simple'
+ # File.open('adresses.gz', 'w') do |file
+ # file.write GZip.gzip('Mum: 0123 456 789', 9)
+ # end
+ #
+ # If you provide a +level+, you can control how strong
+ # the string is compressed:
+ # - 0: no compression, only convert to gzip format
+ # - 1: compress fast
+ # - 7: compress more, but still fast (default)
+ # - 8: compress more, slower
+ # - 9: compress best, very slow
+ def GZip.gzip s, level = DEFAULT_GZIP_LEVEL
+ Zlib::Deflate.new(level).deflate s, Zlib::FINISH
+ end
+end
+
+# String extensions to use the GZip module.
+#
+# The methods gzip and gunzip provide an even more simple
+# interface to the ZLib:
+#
+# # create a big string
+# x = 'a' * 1000
+#
+# # zip it
+# x_gz = x.gzip
+#
+# # test the result
+# puts 'Zipped %d bytes to %d bytes.' % [x.size, x_gz.size]
+# #-> Zipped 1000 bytes to 19 bytes.
+#
+# # unzipping works
+# p x_gz.gunzip == x #-> true
+class String
+ # Returns the string, unzipped.
+ # See GZip.gunzip
+ def gunzip
+ GZip.gunzip self
+ end
+ # Replaces the string with its unzipped value.
+ # See GZip.gunzip
+ def gunzip!
+ replace gunzip
+ end
+
+ # Returns the string, zipped.
+ # +level+ is the gzip compression level, see GZip.gzip.
+ def gzip level = GZip::DEFAULT_GZIP_LEVEL
+ GZip.gzip self, level
+ end
+ # Replaces the string with its zipped value.
+ # See GZip.gzip.
+ def gzip!(*args)
+ replace gzip(*args)
+ end
+end
+
+if $0 == __FILE__
+ eval DATA.read, nil, $0, __LINE__+4
+end
+
+__END__
+#CODE
+
+# Testing / Benchmark
+x = 'a' * 1000
+x_gz = x.gzip
+puts 'Zipped %d bytes to %d bytes.' % [x.size, x_gz.size] #-> Zipped 1000 bytes to 19 bytes.
+p x_gz.gunzip == x #-> true
+
+require 'benchmark'
+
+INFO = 'packed to %0.3f%%' # :nodoc:
+
+x = Array.new(100000) { rand(255).chr + 'aaaaaaaaa' + rand(255).chr }.join
+Benchmark.bm(10) do |bm|
+ for level in 0..9
+ bm.report "zip #{level}" do
+ $x = x.gzip level
+ end
+ puts INFO % [100.0 * $x.size / x.size]
+ end
+ bm.report 'zip' do
+ $x = x.gzip
+ end
+ puts INFO % [100.0 * $x.size / x.size]
+ bm.report 'unzip' do
+ $x.gunzip
+ end
+end
diff --git a/lib/coderay/helpers/scanner_helper.rb b/lib/coderay/helpers/scanner_helper.rb new file mode 100644 index 0000000..a2e14bb --- /dev/null +++ b/lib/coderay/helpers/scanner_helper.rb @@ -0,0 +1,63 @@ +module CodeRay
+module Scanners
+
+ class Scanner
+
+ # A WordList is a Hash with some additional features.
+ # It is intended to be used for keyword recognition.
+ class WordList < Hash
+
+ def initialize default = false, case_mode = :case_match
+ @case_ignore =
+ case case_mode
+ when :case_match then false
+ when :case_ignore then true
+ else
+ raise ArgumentError,
+ "#{self.class.name}.new: second argument must be :case_ignore or :case_match, but #{case_mode} was given."
+ end
+
+ if @case_ignore
+ super() do |h, k|
+ h[k] = h.fetch k.downcase, default
+ end
+ else
+ super default
+ end
+ end
+
+ def include? word
+ self[word] if @case_ignore
+ has_key? word
+ end
+
+ def add words, kind = true
+ words.each do |word|
+ self[mind_case(word)] = kind
+ end
+ self
+ end
+
+ alias words keys
+
+ def case_ignore?
+ @case_mode
+ end
+
+ private
+ def mind_case word
+ if @case_ignore
+ word.downcase
+ else
+ word.dup
+ end
+ end
+
+ end
+
+ end
+
+end
+end
+
+# vim:sw=2:ts=2:et:tw=78
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb new file mode 100644 index 0000000..1cca607 --- /dev/null +++ b/lib/coderay/scanner.rb @@ -0,0 +1,298 @@ +module CodeRay
+
+ # This module holds class Scanner and its subclasses.
+ # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
+ # can be found in coderay/scanners/ruby.
+ #
+ # Scanner also provides methods and constants for the register mechanism
+ # and the [] method that returns the Scanner class belonging to the
+ # given lang.
+ module Scanners
+
+ # Raised if Scanners[] fails because:
+ # * a file could not be found
+ # * the requested Scanner is not registered
+ ScannerNotFound = Class.new(Exception)
+
+ # Loaded Scanners are saved here.
+ SCANNERS = Hash.new { |h, lang|
+ raise ScannerNotFound, "No scanner for #{lang} found."
+ }
+
+ class << self
+
+ # Registers a scanner class by setting SCANNERS[lang].
+ #
+ # Typically used in Scanners, for example in the Ruby scanner:
+ #
+ # register_for :ruby
+ def register scanner_class, *langs
+ for lang in langs
+ raise ArgumentError, 'lang must be a Symbol, but it was a %s' % lang.class unless lang.is_a? Symbol
+ SCANNERS[lang] = scanner_class
+ end
+ end
+
+ # Loads the scanner class for +lang+ and returns it.
+ #
+ # Example:
+ #
+ # Scanners[:xml].new
+ #
+ # +lang+ is converted using +normalize+ and must be
+ # * a String containing only alphanumeric characters (\w+)
+ # * a Symbol
+ #
+ # Strings are converted to lowercase symbols (so +'C'+ and +'c'+ load the
+ # same scanner, namely the one registered for +:c+.)
+ #
+ # If the scanner isn't registered yet, it is searched.
+ # CodeRay expects that the scanner class is defined in
+ #
+ # <install-dir>/coderay/scanners/<lang>.rb
+ #
+ # (See path_to.)
+ #
+ # If the file isn't found, a ScannerNotFound exception is raised
+ #
+ # The scanner should register itself using +register+. If the scanner is
+ # still not found (because has not registered or registered under another lang),
+ # a ScannerNotFound exception is raised.
+ def [] lang
+ lang = normalize lang
+
+ SCANNERS.fetch lang do
+ scanner_file = path_to lang
+
+ begin
+ require scanner_file
+ rescue LoadError
+ raise ScannerNotFound, "File #{scanner_file} not found."
+ end
+
+ SCANNERS.fetch lang do
+ raise ScannerNotFound, <<-ERR
+No scanner for #{lang} found in #{scanner_file}.
+Known scanners: #{SCANNERS}
+ ERR
+ end
+ end
+ end
+
+ # Alias for +[]+.
+ alias load []
+
+ # Calculates the path where a scanner for +lang+
+ # is expected to be. This is:
+ #
+ # <install-dir>/coderay/scanners/<lang>.rb
+ def path_to lang
+ File.join 'coderay', 'scanners', "#{lang}.rb"
+ end
+
+ # Returns an array of all filenames in the scanners/ folder.
+ # The extension +.rb+ is not included.
+ def languages
+ scanners = File.join File.dirname(__FILE__), 'scanners', '*.rb'
+ Dir[scanners].map do |file|
+ File.basename file, '.rb'
+ end
+ end
+
+ # Loads all scanners that +languages+ finds using +load+.
+ def load_all
+ for lang in languages
+ load lang
+ end
+ end
+
+ # Converts +lang+ to a downcase Symbol if it is a String,
+ # or returns +lang+ if it already is a Symbol.
+ #
+ # Raises +ArgumentError+ for all other objects, or if the
+ # given String includes non-alphanumeric characters (\W).
+ def normalize lang
+ if lang.is_a? Symbol
+ lang
+ elsif lang.is_a? String
+ if lang[/\w+/] == lang
+ lang[/\w+/].downcase.to_sym
+ else
+ raise ArgumentError, "Invalid lang: '#{lang}' given."
+ end
+ elsif lang.nil?
+ :plaintext
+ else
+ raise ArgumentError, "String or Symbol expected, but #{lang.class} given."
+ end
+ end
+
+ end
+
+
+ require 'strscan'
+ # The base class for all Scanners.
+ #
+ # It is a subclass of Ruby's great +StringScanner+, which
+ # makes it easy to access the scanning methods inside.
+ #
+ # It is also +Enumerable+, so you can do this:
+ #
+ # require 'coderay'
+ #
+ # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
+ #
+ # for text, kind in c_scanner
+ # puts text if kind == :operator
+ # end
+ #
+ # # prints: (*==)++;
+ #
+ # OK, this is not a very good example :)
+ # You can also use map, any?, find and even sort_by.
+ class Scanner < StringScanner
+
+ # Raised if a Scanner fails while scanning
+ ScanError = Class.new(Exception)
+
+ require 'coderay/helpers/scanner_helper'
+
+ # The default options for all scanner classes.
+ #
+ # Define @default_options for subclasses.
+ DEFAULT_OPTIONS = { :stream => false }
+
+ class << self
+ # Register the scanner class for all
+ # +langs+.
+ #
+ # See Scanners.register.
+ def register_for *langs
+ Scanners.register self, *langs
+ end
+
+ # Returns if the Scanner can be used in streaming mode.
+ def streamable?
+ is_a? Streamable
+ end
+
+ end
+
+=begin
+ ## Excluded for speed reasons - protected seems to make methods slow.
+
+ # Save the StringScanner methods from being called.
+ # This would not be useful for highlighting.
+ strscan_public_methods = StringScanner.instance_methods - StringScanner.ancestors[1].instance_methods
+ protected(*strscan_public_methods)
+=end
+ # Creates a new Scanner.
+ #
+ # * +code+ is the input String and is handled by the superclass StringScanner.
+ # * +options+ is a Hash with Symbols as keys.
+ # It is merged with the default options of the class (you can overwrite
+ # default options here.)
+ # * +block+ is the callback for streamed highlighting.
+ #
+ # If you set :stream to +true+ in the options, the Scanner uses a
+ # TokenStream with the +block+ as callback to handle the tokens.
+ #
+ # Else, a Tokens object is used.
+ def initialize code, options = {}, &block
+ @options = self.class::DEFAULT_OPTIONS.merge options
+ raise "I am only the basic Scanner class. I can't scan anything. :(\n" +
+ "Use my subclasses." if self.class == Scanner
+
+ # I love this hack. It seems to silence all dos/unix/mac newline problems.
+ super code.gsub(/\r\n?/, "\n")
+
+ if @options[:stream]
+ warn "warning in CodeRay::Scanner.new: :stream is set, but no block was given" unless block_given?
+ raise NotStreamableError, self unless kind_of? Streamable
+ @tokens = TokenStream.new(&block)
+ else
+ warn "warning in CodeRay::Scanner.new: Block given, but :stream is #{@options[:stream]}" if block_given?
+ @tokens = Tokens.new
+ end
+ end
+
+ # More mnemonic accessor name for the input string.
+ alias code string
+
+ # Scans the code and returns all tokens in a Tokens object.
+ def tokenize options = {}
+ options = @options.merge({}) #options
+ if @options[:stream] # :stream must have been set already
+ reset ## what is this for?
+ scan_tokens @tokens, options
+ @tokens
+ else
+ @cached_tokens ||= scan_tokens @tokens, options
+ end
+ end
+
+ # you can also see this as a read-only attribute
+ alias tokens tokenize
+
+ # Traverses the tokens.
+ def each &block
+ raise ArgumentError, 'Cannot traverse TokenStream.' if @options[:stream]
+ tokens.each(&block)
+ end
+ include Enumerable
+
+ # The current line position of the scanner.
+ #
+ # Beware, this is implemented inefficiently. It should be used
+ # for debugging only.
+ def line
+ string[0..pos].count("\n") + 1
+ end
+
+ protected
+
+ # This is the central method, and often the only one a subclass implements.
+ #
+ # Subclasses must implement this method; it must return +tokens+ and must only
+ # use Tokens#<< for storing scanned tokens.
+ def scan_tokens tokens, options
+ raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
+ end
+
+ # Scanner error with additional status information
+ def raise_inspect msg, tokens, ambit = 30
+ raise ScanError, <<-EOE % [
+
+
+***ERROR in %s: %s
+
+tokens:
+%s
+
+current line: %d pos = %d
+matched: %p
+bol? = %p, eos? = %p
+
+surrounding code:
+%p ~~ %p
+
+
+***ERROR***
+
+ EOE
+ File.basename(caller[0]),
+ msg,
+ tokens.last(10).map { |t| t.inspect }.join("\n"),
+ line, pos,
+ matched, bol?, eos?,
+ string[pos-ambit,ambit],
+ string[pos,ambit],
+ ]
+ end
+
+ end
+
+ end
+end
+
+# vim:sw=2:ts=2:et:tw=78
diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb new file mode 100644 index 0000000..3420822 --- /dev/null +++ b/lib/coderay/scanners/c.rb @@ -0,0 +1,147 @@ +module CodeRay module Scanners
+
+ class C < Scanner
+
+ register_for :c
+
+ RESERVED_WORDS = [
+ 'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
+ 'for', 'goto', 'if', 'return', 'switch', 'while',
+ 'struct', 'union', 'enum', 'typedef',
+ 'static', 'register', 'auto', 'extern',
+ 'sizeof',
+ 'volatile', 'const', # C89
+ 'inline', 'restrict', # C99
+ ]
+
+ PREDEFINED_TYPES = [
+ 'int', 'long', 'short', 'char', 'void',
+ 'signed', 'unsigned', 'float', 'double',
+ 'bool', 'complex', # C99
+ ]
+
+ PREDEFINED_CONSTANTS = [
+ 'EOF', 'NULL',
+ 'true', 'false', # C99
+ ]
+
+ IDENT_KIND = Scanner::WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_TYPES, :pre_type).
+ add(PREDEFINED_CONSTANTS, :pre_constant)
+
+ ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
+
+ def scan_tokens tokens, options
+
+ state = :initial
+
+ until eos?
+
+ kind = :error
+ match = nil
+
+ if state == :initial
+
+ if scan(/ \s+ | \\\n /x)
+ kind = :space
+
+ elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+ kind = :comment
+
+ elsif match = scan(/ \# \s* if \s* 0 /x)
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
+ kind = :comment
+
+ elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
+ kind = :operator
+
+ elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+ kind = IDENT_KIND[match]
+ if kind == :ident and check(/:(?!:)/)
+ match << scan(/:/)
+ kind = :label
+ end
+
+ elsif match = scan(/L?"/)
+ tokens << [:open, :string]
+ if match[0] == ?L
+ tokens << ['L', :modifier]
+ match = '"'
+ end
+ state = :string
+ kind = :delimiter
+
+ elsif scan(/#\s*(\w*)/)
+ kind = :preprocessor # FIXME multiline preprocs
+ state = :include_expected if self[1] == 'include'
+
+ elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
+ kind = :char
+
+ elsif scan(/0[xX][0-9A-Fa-f]+/)
+ kind = :hex
+
+ elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
+ kind = :oct
+
+ elsif scan(/(?:\d+)(?![.eEfF])/)
+ kind = :integer
+
+ elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ kind = :float
+
+ else
+ getch
+ end
+
+ elsif state == :string
+ if scan(/[^\\"]+/)
+ kind = :content
+ elsif scan(/"/)
+ tokens << ['"', :delimiter]
+ tokens << [:close, :string]
+ state = :initial
+ next
+ elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ kind = :char
+ elsif scan(/ \\ | $ /x)
+ kind = :error
+ state = :initial
+ else
+ raise "else case \" reached; %p not handled." % peek(1), tokens
+ end
+
+ elsif state == :include_expected
+ if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
+ kind = :include
+ state = :initial
+
+ elsif match = scan(/\s+/)
+ kind = :space
+ state = :initial if match.index ?\n
+
+ else
+ getch
+
+ end
+
+ else
+ raise 'else-case reached', tokens
+
+ end
+
+ match ||= matched
+ raise [match, kind], tokens if kind == :error
+
+ tokens << [match, kind]
+
+ end
+
+ tokens
+ end
+
+ end
+
+end end
diff --git a/lib/coderay/scanners/delphi.rb b/lib/coderay/scanners/delphi.rb new file mode 100644 index 0000000..4c03147 --- /dev/null +++ b/lib/coderay/scanners/delphi.rb @@ -0,0 +1,123 @@ +module CodeRay module Scanners
+
+ class Delphi < Scanner
+
+ register_for :delphi
+
+ RESERVED_WORDS = [
+ 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
+ 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
+ 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
+ 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
+ 'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
+ 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
+ 'procedure', 'program', 'property', 'raise', 'record', 'repeat',
+ 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
+ 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
+ 'xor', 'on'
+ ]
+
+ DIRECTIVES = [
+ 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
+ 'contains', 'deprecated', 'dispid', 'dynamic', 'export',
+ 'external', 'far', 'forward', 'implements', 'local',
+ 'near', 'nodefault', 'on', 'overload', 'override',
+ 'package', 'pascal', 'platform', 'private', 'protected', 'public',
+ 'published', 'read', 'readonly', 'register', 'reintroduce',
+ 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
+ 'virtual', 'write', 'writeonly'
+ ]
+
+ IDENT_KIND = Scanner::WordList.new(:ident, :case_ignore).
+ add(RESERVED_WORDS, :reserved).
+ add(DIRECTIVES, :directive)
+
+ def scan_tokens tokens, options
+
+ state = :initial
+
+ until eos?
+
+ kind = :error
+ match = nil
+
+ if state == :initial
+
+ if scan(/ \s+ /x)
+ kind = :space
+
+ elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
+ kind = :preprocessor
+
+ elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
+ kind = :comment
+
+ elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x)
+ kind = :operator
+
+ elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+ kind = IDENT_KIND[match]
+
+ elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
+ tokens << [:open, :char]
+ tokens << ["'", :delimiter]
+ tokens << [self[1], :content]
+ tokens << ["'", :delimiter]
+ tokens << [:close, :char]
+ next
+
+ elsif match = scan(/ ' /x)
+ tokens << [:open, :string]
+ state = :string
+ kind = :delimiter
+
+ elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
+ kind = :char
+
+ elsif scan(/ \$ [0-9A-Fa-f]+ /x)
+ kind = :hex
+
+ elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
+ kind = :integer
+
+ elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
+ kind = :float
+
+ else
+ getch
+ end
+
+ elsif state == :string
+ if scan(/[^\n']+/)
+ kind = :content
+ elsif scan(/''/)
+ kind = :char
+ elsif scan(/'/)
+ tokens << ["'", :delimiter]
+ tokens << [:close, :string]
+ state = :initial
+ next
+ elsif scan(/\n/)
+ state = :initial
+ else
+ raise "else case \' reached; %p not handled." % peek(1), tokens
+ end
+
+ else
+ raise 'else-case reached', tokens
+
+ end
+
+ match ||= matched
+ raise [match, kind], tokens if kind == :error
+
+ tokens << [match, kind]
+
+ end
+
+ tokens
+ end
+
+ end
+
+end end
diff --git a/lib/coderay/scanners/helpers/ruby_helper.rb b/lib/coderay/scanners/helpers/ruby_helper.rb new file mode 100644 index 0000000..241b392 --- /dev/null +++ b/lib/coderay/scanners/helpers/ruby_helper.rb @@ -0,0 +1,212 @@ +module CodeRay module Scanners
+
+ class Ruby
+
+ RESERVED_WORDS = %w[
+ and def end in or unless begin
+ defined? ensure module redo super until
+ BEGIN break do next rescue then
+ when END case else for retry
+ while alias class elsif if not return
+ undef yield
+ ]
+
+ DEF_KEYWORDS = %w[ def ]
+ MODULE_KEYWORDS = %w[class module]
+ DEF_NEW_STATE = WordList.new(:initial).
+ add(DEF_KEYWORDS, :def_expected).
+ add(MODULE_KEYWORDS, :module_expected)
+
+ IDENTS_ALLOWING_REGEXP = %w[
+ and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split
+ ]
+ REGEXP_ALLOWED = WordList.new(false).
+ add(IDENTS_ALLOWING_REGEXP, :set)
+
+ PREDEFINED_CONSTANTS = %w[
+ nil true false self
+ DATA ARGV ARGF __FILE__ __LINE__
+ ]
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :pre_constant)
+
+# IDENT = /[a-zA-Z_][a-zA-Z_0-9]*/
+ IDENT = /[a-z_][\w_]*/i
+
+ METHOD_NAME = / #{IDENT} [?!]? /ox
+ METHOD_NAME_EX = /
+ #{IDENT}[?!=]? # common methods: split, foo=, empty?, gsub!
+ | \*\*? # multiplication and power
+ | [-+]@? # plus, minus
+ | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
+ | \[\]=? # array getter and setter
+ | << | >> # append or shift left, shift right
+ | <=?>? | >=? # comparison, rocket operator
+ | ===? # simple equality and case equality
+ /ox
+ INSTANCE_VARIABLE = / @ #{IDENT} /ox
+ CLASS_VARIABLE = / @@ #{IDENT} /ox
+ OBJECT_VARIABLE = / @@? #{IDENT} /ox
+ GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9] | 0[a-zA-Z_0-9]* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
+ PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
+ VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
+
+ QUOTE_TO_TYPE = {
+ '`' => :shell,
+ '/'=> :regexp,
+ }
+ QUOTE_TO_TYPE.default = :string
+
+ REGEXP_MODIFIERS = /[mixounse]*/
+ REGEXP_SYMBOLS = /
+ [|?*+?(){}\[\].^$]
+ /x
+
+ DECIMAL = /\d+(?:_\d+)*/ # doesn't recognize 09 as octal error
+ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
+ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
+ BINARY = /0b[01]+(?:_[01]+)*/
+
+ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
+ FLOAT_OR_INT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? )? /ox
+ FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) /ox
+ NUMERIC = / #{OCTAL} | #{HEXADECIMAL} | #{BINARY} | #{FLOAT_OR_INT} /ox
+
+ SYMBOL = /
+ :
+ (?:
+ #{METHOD_NAME_EX}
+ | #{PREFIX_VARIABLE}
+ | ['"]
+ )
+ /ox
+
+ # TODO investigste \M, \c and \C escape sequences
+ # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
+ # assert_equal(225, ?\M-a)
+ # assert_equal(129, ?\M-\C-a)
+ ESCAPE = /
+ [abefnrstv]
+ | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
+ | [0-7]{1,3}
+ | x[0-9A-Fa-f]{1,2}
+ | .
+ /mx
+
+ CHARACTER = /
+ \?
+ (?:
+ [^\s\\]
+ | \\ #{ESCAPE}
+ )
+ /mx
+
+ # NOTE: This is not completel correct, but
+ # nobody needs heredoc delimiters ending with \n.
+ HEREDOC_OPEN = /
+ << (-)? # $1 = float
+ (?:
+ ( [A-Za-z_0-9]+ ) # $2 = delim
+ |
+ ( ["'`] ) # $3 = quote, type
+ ( [^\n]*? ) \3 # $4 = delim
+ )
+ /mx
+
+ RDOC = /
+ =begin (?!\S)
+ .*?
+ (?: \Z | ^=end (?!\S) [^\n]* )
+ /mx
+
+ DATA = /
+ __END__$
+ .*?
+ (?: \Z | (?=^\#CODE) )
+ /mx
+
+ RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
+
+ FANCY_START = / % ( [qQwWxsr] | (?![\w\s=]) ) (.) /mox
+
+ FancyStringType = {
+ 'q' => [:string, false],
+ 'Q' => [:string, true],
+ 'r' => [:regexp, true],
+ 's' => [:symbol, false],
+ 'x' => [:shell, true],
+ 'w' => [:string, :word],
+ 'W' => [:string, :word],
+ }
+ FancyStringType['w'] = FancyStringType['q']
+ FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
+
+ class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
+ :paren, :paren_depth, :pattern
+
+ CLOSING_PAREN = Hash[ *%w[
+ ( )
+ [ ]
+ < >
+ { }
+ ] ]
+
+ CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
+ OPENING_PAREN = CLOSING_PAREN.invert
+
+ STRING_PATTERN = Hash.new { |h, k|
+ delim, interpreted = *k
+ delim_pattern = Regexp.escape(delim.dup)
+ if starter = OPENING_PAREN[delim]
+ delim_pattern << Regexp.escape(starter)
+ end
+
+
+ special_escapes =
+ case interpreted
+ when :regexp_symbols
+ '| ' + REGEXP_SYMBOLS.source
+ when :words
+ '| \s'
+ end
+
+ h[k] =
+ if interpreted and not delim == '#'
+ / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
+ else
+ / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
+ end
+ }
+
+ HEREDOC_PATTERN = Hash.new { |h, k|
+ delim, interpreted, indented = *k
+ delim_pattern = Regexp.escape(delim.dup)
+ delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
+ h[k] =
+ if interpreted
+ / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx
+ else
+ / (?= #{delim_pattern}() | \\ ) /mx
+ end
+ }
+
+ def initialize kind, interpreted, delim, heredoc = false
+ if paren = CLOSING_PAREN[delim]
+ delim, paren = paren, delim
+ paren_depth = 1
+ end
+ if heredoc
+ pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
+ delim = nil
+ else
+ pattern = STRING_PATTERN[ [delim, interpreted] ]
+ end
+ super kind, interpreted, delim, heredoc, paren, paren_depth, pattern
+ end
+ end unless defined? StringState
+
+ end
+
+end end
diff --git a/lib/coderay/scanners/mush.rb b/lib/coderay/scanners/mush.rb new file mode 100644 index 0000000..5217ed9 --- /dev/null +++ b/lib/coderay/scanners/mush.rb @@ -0,0 +1,102 @@ +module CodeRay module Scanners
+
+ class Mush < Scanner
+
+ register_for :mush
+
+ RESERVED_WORDS = [
+ ]
+
+ IDENT_KIND = Scanner::WordList.new(:ident, :case_ignore).
+ add(RESERVED_WORDS, :reserved).
+ add(DIRECTIVES, :directive)
+
+ def scan_tokens tokens, options
+
+ state = :initial
+
+ until eos?
+
+ kind = :error
+ match = nil
+
+ if state == :initial
+
+ if scan(/ \s+ /x)
+ kind = :space
+
+ elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
+ kind = :preprocessor
+
+ elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
+ kind = :comment
+
+ elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x)
+ kind = :operator
+
+ elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+ kind = IDENT_KIND[match]
+
+ elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
+ tokens << [:open, :char]
+ tokens << ["'", :delimiter]
+ tokens << [self[1], :content]
+ tokens << ["'", :delimiter]
+ tokens << [:close, :char]
+ next
+
+ elsif match = scan(/ ' /x)
+ tokens << [:open, :string]
+ state = :string
+ kind = :delimiter
+
+ elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
+ kind = :char
+
+ elsif scan(/ \$ [0-9A-Fa-f]+ /x)
+ kind = :hex
+
+ elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
+ kind = :integer
+
+ elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
+ kind = :float
+
+ else
+ getch
+ end
+
+ elsif state == :string
+ if scan(/[^\n']+/)
+ kind = :content
+ elsif scan(/''/)
+ kind = :char
+ elsif scan(/'/)
+ tokens << ["'", :delimiter]
+ tokens << [:close, :string]
+ state = :initial
+ next
+ elsif scan(/\n/)
+ state = :initial
+ else
+ raise "else case \' reached; %p not handled." % peek(1), tokens
+ end
+
+ else
+ raise 'else-case reached', tokens
+
+ end
+
+ match ||= matched
+ raise [match, kind], tokens if kind == :error
+
+ tokens << [match, kind]
+
+ end
+
+ tokens
+ end
+
+ end
+
+end end
diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb new file mode 100644 index 0000000..0aebf35 --- /dev/null +++ b/lib/coderay/scanners/plaintext.rb @@ -0,0 +1,13 @@ +module CodeRay module Scanners
+
+ class Plaintext < Scanner
+
+ register_for :plaintext, :plain
+
+ def scan_tokens tokens, options
+ tokens << [scan_until(/\z/), :plain]
+ end
+
+ end
+
+end end
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb new file mode 100644 index 0000000..433726b --- /dev/null +++ b/lib/coderay/scanners/ruby.rb @@ -0,0 +1,333 @@ +module CodeRay module Scanners
+
+ # This scanner is really complex, since Ruby _is_ a complex language!
+ #
+ # It tries to highlight 100% of all common code,
+ # and 90% of strange codes.
+ #
+ # It is optimized for HTML highlighting, and is not very useful for
+ # parsing or pretty printing.
+ #
+ # For now, I think it's better than the scanners in VIM or Syntax, or
+ # any highlighter I was able to find, except Caleb's RubyLexer.
+ #
+ # I hope it's also better than the rdoc/irb lexer.
+ class Ruby < Scanner
+
+ include Streamable
+
+ register_for :ruby
+
+ require 'coderay/scanners/helpers/ruby_helper'
+
+ DEFAULT_OPTIONS = {
+ :parse_regexps => true,
+ }
+
+ private
+ def scan_tokens tokens, options
+ parse_regexp = false # options[:parse_regexps]
+ first_bake = saved_tokens = nil
+ last_token_dot = false
+ fancy_allowed = regexp_allowed = true
+ heredocs = nil
+ last_state = nil
+ state = :initial
+ depth = nil
+ states = []
+
+ until eos?
+ type = :error
+ match = nil
+ kind = nil
+
+ if state.instance_of? StringState
+# {{{
+
+ match = scan_until(state.pattern) || scan_until(/\z/)
+ tokens << [match, :content] unless match.empty?
+ break if eos?
+
+ if state.heredoc and self[1]
+ match = getch + scan_until(/$/)
+ tokens << [match, :delimiter]
+ tokens << [:close, state.type]
+ state = :initial
+ next
+ end
+
+ case match = getch
+
+ when state.delim
+ if state.paren
+ state.paren_depth -= 1
+ if state.paren_depth > 0
+ tokens << [match, :nesting_delimiter]
+ next
+ end
+ end
+ tokens << [match, :delimiter]
+ if state.type == :regexp and not eos?
+ modifiers = scan(/#{REGEXP_MODIFIERS}/ox)
+ tokens << [modifiers, :modifier] unless modifiers.empty?
+ if parse_regexp
+ extended = modifiers.index ?x
+ tokens, regexp = saved_tokens, tokens
+ for text, type in regexp
+ if text.is_a? String
+ case type
+ when :content
+ text.scan(/([^#]+)|(#.*)/) do |plain, comment|
+ if plain
+ tokens << [plain, :content]
+ else
+ tokens << [comment, :comment]
+ end
+ end
+ when :character
+ if text[/\\(?:[swdSWDAzZbB]|\d+)/]
+ tokens << [text, :modifier]
+ else
+ tokens << [text, type]
+ end
+ else
+ tokens << [text, type]
+ end
+ else
+ tokens << [text, type]
+ end
+ end
+ first_bake = saved_tokens = nil
+ end
+ end
+ tokens << [:close, state.type]
+ fancy_allowed = regexp_allowed = false
+ state = :initial
+
+ when '\\'
+ if state.interpreted
+ if esc = scan(/ #{ESCAPE} /ox)
+ tokens << [match + esc, :char]
+ else
+ tokens << [match, :error]
+ end
+ else
+ case m = getch
+ when state.delim, '\\'
+ tokens << [match + m, :char]
+ else
+ tokens << [match + m, :content]
+ end
+ end
+
+ when '#'
+ case peek(1)[0]
+ when ?{
+ states.push [state, depth, heredocs]
+ fancy_allowed = regexp_allowed = true
+ state, depth = :initial, 1
+ tokens << [match + getch, :escape]
+ when ?$, ?@
+ tokens << [match, :escape]
+ last_state = state # scan one token as normal code, then return here
+ state = :initial
+ else
+ raise "else-case # reached; #%p not handled" % peek(1), tokens
+ end
+
+ when state.paren
+ state.paren_depth += 1
+ tokens << [match, :nesting_delimiter]
+
+ when REGEXP_SYMBOLS
+ tokens << [match, :function]
+
+ else
+ raise "else-case \" reached; %p not handled, state = %p" % [match, state], tokens
+
+ end
+ next
+# }}}
+ else
+# {{{
+ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
+ ( bol? and match = scan(/ #{DATA} | #{RDOC} /ox) )
+ fancy_allowed = true
+ case m = match[0]
+ when ?\s, ?\t, ?\f
+ match << scan(/\s*/) unless eos? or heredocs
+ type = :space
+ when ?\n, ?\\
+ type = :space
+ regexp_allowed = m == ?\n
+ if heredocs
+ unscan # heredoc scanning needs \n at start
+ state = heredocs.shift
+ tokens << [:open, state.type]
+ heredocs = nil if heredocs.empty?
+ next
+ else
+ match << scan(/\s*/) unless eos?
+ end
+ when ?#, ?=, ?_
+ type = :comment
+ regexp_allowed = true
+ else
+ raise "else-case _ reached, because case %p was not handled" % [matched[0].chr], tokens
+ end
+ tokens << [match, type]
+ next
+
+ elsif state == :initial
+ if match = scan(/ \.\.?\.? | [-+*=>;,|&!\(\)\[\]~^]+ | [\{\}] | :: /x)
+ if match !~ / [.\)\]\}] \z/x or match =~ /\.\.\.?/
+ regexp_allowed = fancy_allowed = :set
+ end
+ last_token_dot = :set if match == '.' or match == '::'
+ type = :operator
+ unless states.empty?
+ case match
+ when '{'
+ depth += 1
+ when '}'
+ depth -= 1
+ if depth == 0
+ state, depth, heredocs = *states.pop
+ type = :escape
+ end
+ end
+ end
+
+ elsif match = scan(/#{METHOD_NAME}/o)
+ if last_token_dot
+ type = if match[/^[A-Z]/] then :constant else :ident end
+ else
+ type = IDENT_KIND[match]
+ if type == :ident and match[/^[A-Z]/]
+ type = :constant
+ elsif type == :reserved
+ state = DEF_NEW_STATE[match]
+ end
+ end
+ fancy_allowed = regexp_allowed = REGEXP_ALLOWED[match]
+
+ elsif match = scan(/ ['"] /mx)
+ tokens << [:open, :string]
+ type = :delimiter
+ state = StringState.new :string, match != '\'', match.dup # important for streaming
+
+ elsif match = scan(/#{INSTANCE_VARIABLE}/o)
+ type = :instance_variable
+
+ elsif regexp_allowed and match = scan(/ \/ /mx)
+ tokens << [:open, :regexp]
+ type = :delimiter
+ interpreted = true
+ state = StringState.new :regexp, interpreted, match.dup
+ if parse_regexp
+ tokens, saved_tokens = [], tokens
+ end
+
+ elsif match = scan(/#{NUMERIC}/o)
+ type = if match[/#{FLOAT}/o] then :float else :integer end
+
+ elsif fancy_allowed and match = scan(/#{SYMBOL}/o)
+ case match[1]
+ when ?', ?"
+ tokens << [:open, :symbol]
+ state = StringState.new :symbol, match[1] == ?", match[1,1]
+ end
+ type = :symbol
+
+ elsif fancy_allowed and match = scan(/#{HEREDOC_OPEN}/o)
+ indented, quote = self[1] == '-', self[3]
+ delim = self[quote ? 4 : 2]
+ type = QUOTE_TO_TYPE[quote]
+ tokens << [:open, type]
+ tokens << [match, :delimiter]
+ match = :close
+ heredoc = StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
+ heredocs ||= [] # create heredocs if empty
+ heredocs << heredoc
+
+ elsif fancy_allowed and match = scan(/#{FANCY_START}/o)
+ type, interpreted = *FancyStringType.fetch(self[1]) do
+ raise 'Unknown fancy string: %%%p' % k, tokens
+ end
+ tokens << [:open, type]
+ state = StringState.new type, interpreted, self[2]
+ type = :delimiter
+
+ elsif fancy_allowed and match = scan(/#{CHARACTER}/o)
+ type = :integer
+
+ elsif match = scan(/ [\/%<?:] /x)
+ regexp_allowed = fancy_allowed = :set
+ type = :operator
+
+ elsif match = scan(/`/)
+ if last_token_dot
+ type = :operator
+ else
+ tokens << [:open, :shell]
+ type = :delimiter
+ state = StringState.new :shell, true, '`'
+ end
+
+ elsif match = scan(/#{GLOBAL_VARIABLE}/o)
+ type = :global_variable
+
+ elsif match = scan(/#{CLASS_VARIABLE}/o)
+ type = :class_variable
+
+ else
+ match = getch
+
+ end
+
+ elsif state == :def_expected
+ if match = scan(/ (?: #{VARIABLE} (?: ::#{IDENT} )* \. )? #{METHOD_NAME_EX} /ox)
+ type = :method
+ else
+ match = getch
+ end
+ state = :initial
+
+ elsif state == :module_expected
+ if match = scan(/<</)
+ type = :operator
+ else
+ if match = scan(/ (?:#{IDENT}::)* #{IDENT} /ox)
+ type = :class
+ else
+ match = getch
+ end
+ end
+ state = :initial
+
+ end
+
+ regexp_allowed = regexp_allowed == :set
+ fancy_allowed = fancy_allowed == :set
+ last_token_dot = last_token_dot == :set
+
+ if $DEBUG
+ raise_inspect 'error token %p in line %d' % [tokens.last, line], tokens if not type or type == :error
+ end
+
+ tokens << [match, type]
+
+ if last_state
+ state = last_state
+ last_state = nil
+ end
+# }}}
+ end
+ end
+
+ tokens
+ end
+ end
+
+end end
+# vim:fdm=marker
diff --git a/lib/coderay/scanners/rubyfast.rb b/lib/coderay/scanners/rubyfast.rb new file mode 100644 index 0000000..baff382 --- /dev/null +++ b/lib/coderay/scanners/rubyfast.rb @@ -0,0 +1,287 @@ +module CodeRay module Scanners
+
+ class Ruby < Scanner
+
+ register_for :rubyfast
+
+ RESERVED_WORDS = [
+ 'and', 'def', 'end', 'in', 'or', 'unless', 'begin',
+ 'defined?', 'ensure', 'module', 'redo', 'super', 'until',
+ 'BEGIN', 'break', 'do', 'next', 'rescue', 'then',
+ 'when', 'END', 'case', 'else', 'for', 'retry',
+ 'while', 'alias', 'class', 'elsif', 'if', 'not', 'return',
+ 'undef', 'yield',
+ ]
+
+ DEF_KEYWORDS = ['def']
+ MODULE_KEYWORDS = ['class', 'module']
+ DEF_NEW_STATE = WordList.new(:initial).
+ add(DEF_KEYWORDS, :def_expected).
+ add(MODULE_KEYWORDS, :module_expected)
+
+ WORDS_ALLOWING_REGEXP = [
+ 'and', 'or', 'not', 'while', 'until', 'unless', 'if', 'elsif', 'when'
+ ]
+ REGEXP_ALLOWED = WordList.new(false).
+ add(WORDS_ALLOWING_REGEXP, :set)
+
+ PREDEFINED_CONSTANTS = [
+ 'nil', 'true', 'false', 'self',
+ 'DATA', 'ARGV', 'ARGF', '__FILE__', '__LINE__',
+ ]
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :pre_constant)
+
+ IDENT = /[a-zA-Z_][a-zA-Z_0-9]*/
+
+ METHOD_NAME = / #{IDENT} [?!]? /xo
+ METHOD_NAME_EX = /
+ #{IDENT}[?!=]? # common methods: split, foo=, empty?, gsub!
+ | \*\*? # multiplication and power
+ | [-+~]@? # plus, minus
+ | [\/%&|^`] # division, modulo or format strings, &and, |or, ^xor, `system`
+ | \[\]=? # array getter and setter
+ | <=?>? | >=? # comparison, rocket operator
+ | << | >> # append or shift left, shift right
+ | ===? # simple equality and case equality
+ /ox
+ GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9] | 0[a-zA-Z_0-9]* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
+
+ DOUBLEQ = / " [^"\#\\]* (?: (?: \#\{.*?\} | \#(?:$")? | \\. ) [^"\#\\]* )* "? /mox
+ SINGLEQ = / ' [^'\\]* (?: \\. [^'\\]* )* '? /mox
+ STRING = / #{SINGLEQ} | #{DOUBLEQ} /ox
+
+ SHELL = / ` [^`\#\\]* (?: (?: \#\{.*?\} | \#(?:$`)? | \\. ) [^`\#\\]* )* `? /mox
+ REGEXP =%r! / [^/\#\\]* (?: (?: \#\{.*?\} | \#(?:$/)? | \\. ) [^/\#\\]* )* /? !mox
+
+ DECIMAL = /\d+(?:_\d+)*/ # doesn't recognize 09 as octal error
+ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
+ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
+ BINARY = /0b[01]+(?:_[01]+)*/
+
+ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
+ FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) /
+ INTEGER = /#{OCTAL}|#{HEXADECIMAL}|#{BINARY}|#{DECIMAL}/
+
+ ESCAPE_STRING = /
+ % (?!\s)
+ (?:
+ [qsw]
+ (?:
+ \( [^\)\\]* (?: \\. [^\)\\]* )* \)?
+ |
+ \[ [^\]\\]* (?: \\. [^\]\\]* )* \]?
+ |
+ \{ [^\}\\]* (?: \\. [^\}\\]* )* \}?
+ |
+ \< [^\>\\]* (?: \\. [^\>\\]* )* \>?
+ |
+ \\ [^\\ ]* \\?
+ |
+ ( [^a-zA-Z0-9] ) # $1
+ (?:(?!\1)[^\\])* (?: \\. (?:(?!\1)[^\#\\])* )* \1?
+ )
+ |
+ [QrxWr]?
+ (?:
+ \( [^\)\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\)\#\\]* )* \)?
+ |
+ \[ [^\]\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\]\#\\]* )* \]?
+ |
+ \{ [^\}\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\}\#\\]* )* \}?
+ |
+ \< [^\>\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\>\#\\]* )* \>?
+ |
+ \# [^\# \\]* (?: \\. [^\# \\]* )* \#?
+ |
+ \\ [^\\\# ]* (?: (?:\#\{.*?\}|\# ) [^\\\# ]* )* \\?
+ |
+ ( [^a-zA-Z0-9] ) # $2
+ (?:(?!\2)[^\#\\])* (?: (?:\#\{.*?\}|\#|\\.) (?:(?!\2)[^\#\\])* )* \2?
+ )
+ )
+ /mox
+
+ SYMBOL = /
+ :
+ (?:
+ #{GLOBAL_VARIABLE}
+ | @@?#{IDENT}
+ | #{METHOD_NAME_EX}
+ | #{STRING}
+ )/ox
+
+ HEREDOC = /
+ << (?! [\dc] )
+ (?: [^\n]*? << )?
+ (?:
+ ([a-zA-Z_0-9]+)
+ (?: .*? ^\1$ | .* )
+ |
+ -([a-zA-Z_0-9]+)
+ (?: .*? ^\s*\2$ | .* )
+ |
+ (["\'`]) (.*?) \3
+ (?: .*? ^\4$ | .* )
+ |
+ - (["\'`]) (.*?) \5
+ (?: .*? ^\s*\6$ | .* )
+ )
+ /mx
+
+ RDOC = /
+ =begin (?!\S) [^\n]* \n?
+ (?:
+ (?! =end (?!\S) )
+ [^\n]* \n?
+ )*
+ (?:
+ =end (?!\S) [^\n]*
+ )?
+ /mx
+
+ DATA = /
+ __END__\n
+ (?:
+ (?=\#CODE)
+ |
+ .*
+ )
+ /
+
+ private
+ def scan_tokens tokens, options
+
+ state = :initial
+ regexp_allowed = true
+ last_token_dot = false
+
+ until eos?
+ match = nil
+ kind = :error
+
+ if scan(/\s+/) # in every state
+ kind = :space
+ regexp_allowed = :set if regexp_allowed or matched.index(?\n) # delayed flag setting
+
+ elsif scan(/ \#[^\n]* /x) # in every state
+ kind = :comment
+ regexp_allowed = :set if regexp_allowed
+
+ elsif state == :initial
+ # IDENTIFIERS, KEYWORDS
+ if scan(GLOBAL_VARIABLE)
+ kind = :global_variable
+ elsif scan(/ @@ #{IDENT} /ox)
+ kind = :class_variable
+ elsif scan(/ @ #{IDENT} /ox)
+ kind = :instance_variable
+ elsif scan(/ #{DATA} | #{RDOC} /ox)
+ kind = :comment
+ elsif scan(METHOD_NAME)
+ match = matched
+ if last_token_dot
+ kind =
+ if match[/^[A-Z]/]
+ :constant
+ else
+ :ident
+ end
+ else
+ kind = IDENT_KIND[match]
+ if kind == :ident and match[/^[A-Z]/]
+ kind = :constant
+ elsif kind == :reserved
+ state = DEF_NEW_STATE[match]
+ regexp_allowed = REGEXP_ALLOWED[match]
+ end
+ end
+
+ elsif scan(STRING)
+ kind = :string
+ elsif scan(SHELL)
+ kind = :shell
+ elsif scan(HEREDOC)
+ kind = :string
+ elsif check(/\//) and regexp_allowed
+ scan(REGEXP)
+ kind = :regexp
+ elsif scan(ESCAPE_STRING)
+ match = matched
+ kind =
+ case match[0]
+ when ?s
+ :symbol
+ when ?r
+ :regexp
+ when ?x
+ :shell
+ else
+ :string
+ end
+
+ elsif scan(/:(?:#{GLOBAL_VARIABLE}|#{METHOD_NAME_EX}|#{STRING})/ox)
+ kind = :symbol
+ elsif scan(/
+ \? (?:
+ [^\s\\]
+ |
+ \\ (?:M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-))? (?: \\ (?: . | [0-7]{3} | x[0-9A-Fa-f][0-9A-Fa-f] )
+ )
+ /mx)
+ kind = :integer
+
+ elsif scan(/ [-+*\/%=<>;,|&!()\[\]{}~?] | \.\.?\.? | ::? /x)
+ kind = :operator
+ match = matched
+ regexp_allowed = :set if match[-1,1] =~ /[~=!<>|&^,\(\[+\-\/\*%]\z/
+ last_token_dot = :set if match == '.' or match == '::'
+ elsif scan(FLOAT)
+ kind = :float
+ elsif scan(INTEGER)
+ kind = :integer
+ else
+ getch
+ end
+
+ elsif state == :def_expected
+ if scan(/ (?:#{IDENT}::)* (?:#{IDENT}\.)? #{METHOD_NAME_EX} /ox)
+ kind = :method
+ else
+ getch
+ end
+ state = :initial
+
+ elsif state == :module_expected
+ if scan(/<</)
+ kind = :operator
+ else
+ if scan(/ (?:#{IDENT}::)* #{IDENT} /ox)
+ kind = :method
+ else
+ getch
+ end
+ state = :initial
+ end
+
+ end
+
+ text = match || matched
+
+ if kind == :regexp and not eos?
+ text << scan(/[eimnosux]*/)
+ end
+
+ regexp_allowed = (regexp_allowed == :set) # delayed flag setting
+ last_token_dot = last_token_dot == :set
+
+ tokens << [text, kind]
+ end
+
+ tokens
+ end
+ end
+
+end end
diff --git a/lib/coderay/scanners/rubylex.rb b/lib/coderay/scanners/rubylex.rb new file mode 100644 index 0000000..2e69d39 --- /dev/null +++ b/lib/coderay/scanners/rubylex.rb @@ -0,0 +1,102 @@ +require 'rubygems'
+require_gem 'rubylexer'
+require 'rubylexer.rb'
+
+module CodeRay module Scanners
+
+ class RubyLex < Scanner
+
+ register_for :rubylex
+
+ class FakeFile < String
+
+ def initialize(*)
+ super
+ @pos = 0
+ end
+
+ attr_accessor :pos
+
+ def read x
+ pos = @pos
+ @pos += x
+ self[pos ... @pos]
+ end
+
+ def getc
+ pos = @pos
+ @pos += 1
+ self[pos]||-1
+ end
+
+ def eof?
+ @pos == size
+ end
+
+ def each_byte
+ until eof?
+ yield getc
+ end
+ end
+
+ def method_missing meth, *args
+ raise NoMethodError, '%s%s' % [meth, args]
+ end
+
+ end
+
+ private
+ Translate = {
+ :ignore => :comment,
+ :varname => :ident,
+ :number => :integer,
+ :ws => :space,
+ :escnl => :space,
+ :keyword => :reserved,
+ :methname => :method,
+ :renderexactlystring => :regexp,
+ :string => :string,
+ }
+
+ def scan_tokens tokens, options
+ require 'tempfile'
+ Tempfile.open('~coderay_tempfile') do |file|
+ file.binmode
+ file.write code
+ file.rewind
+ lexer = RubyLexer.new 'code', file
+ loop do
+ begin
+ tok = lexer.get1token
+ rescue => kaboom
+ err = <<-EOE
+ ERROR!!!
+#{kaboom.inspect}
+#{kaboom.backtrace.join("\n")}
+ EOE
+ tokens << [err, :error]
+ Kernel.raise
+ end
+ break if tok.is_a? EoiToken
+ next if tok.is_a? FileAndLineToken
+ kind = tok.class.name[/(.*?)Token$/,1].downcase.to_sym
+ kind = Translate.fetch kind, kind
+ text = tok.ident
+ case kind
+ when :hereplaceholder
+ text = tok.ender
+ kind = :string
+ when :herebody, :outlinedherebody
+ text = tok.ident.ident
+ kind = :string
+ end
+ text = text.inspect unless text.is_a? String
+ p token if kind == :error
+ tokens << [text.dup, kind]
+ end
+ end
+ tokens
+ end
+ end
+
+end end
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb new file mode 100644 index 0000000..71ad33a --- /dev/null +++ b/lib/coderay/tokens.rb @@ -0,0 +1,302 @@ +module CodeRay
+
+ # The Tokens class represents a list of tokens returnd from
+ # a Scanner.
+ #
+ # A token is not a special object, just a two-element Array
+ # consisting of
+ # * the _token_ _kind_ (a Symbol representing the type of the token)
+ # * the _token_ _text_ (the original source of the token in a String)
+ #
+ # A token looks like this:
+ #
+ # [:comment, '# It looks like this']
+ # [:float, '3.1415926']
+ # [:error, 'äöü']
+ #
+ # Some scanners also yield some kind of sub-tokens, represented by special
+ # token texts, namely :open and :close .
+ #
+ # The Ruby scanner, for example, splits "a string" into:
+ #
+ # [
+ # [:open, :string],
+ # [:delimiter, '"'],
+ # [:content, 'a string'],
+ # [:delimiter, '"'],
+ # [:close, :string]
+ # ]
+ #
+ # Tokens is also the interface between Scanners and Encoders:
+ # The input is split and saved into a Tokens object. The Encoder
+ # then builds the output from this object.
+ #
+ # Thus, the syntax below becomes clear:
+ #
+ # CodeRay.scan('price = 2.59', :ruby).html
+ # # the Tokens object is here -------^
+ #
+ # See how small it is? ;)
+ #
+ # Tokens gives you the power to handle pre-scanned code very easily:
+ # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
+ # that you put in your DB.
+ #
+ # Tokens' subclass TokenStream allows streaming to save memory.
+ class Tokens < Array
+
+ class << self
+
+ # Convert the token to a string.
+ #
+ # This format is used by Encoders.Tokens.
+ # It can be reverted using read_token.
+ def write_token text, type
+ if text.is_a? String
+ "#{type}\t#{escape(text)}\n"
+ else
+ ":#{text}\t#{type}\t\n"
+ end
+ end
+
+ # Read a token from the string.
+ #
+ # Inversion of write_token.
+ #
+ # TODO Test this!
+ def read_token token
+ type, text = token.split("\t", 2)
+ if type[0] == ?:
+ [text.to_sym, type[1..-1].to_sym]
+ else
+ [type.to_sym, unescape(text)]
+ end
+ end
+
+ # Escapes a string for use in write_token.
+ def escape text
+ text.gsub(/[\n\\]/, '\\\\\&')
+ end
+
+ # Unescapes a string created by escape.
+ def unescape text
+ text.gsub(/\\[\n\\]/) { |m| m[1,1] }
+ end
+
+ end
+
+ # Whether the object is a TokenStream.
+ #
+ # Returns false.
+ def stream?
+ false
+ end
+
+ alias :orig_each :each
+ # Iterates over all tokens.
+ #
+ # If a filter is given, only tokens of that kind are yielded.
+ def each kind_filter = nil, &block
+ unless kind_filter
+ orig_each(&block)
+ else
+ orig_each do |text, kind|
+ next unless kind == kind_filter
+ yield text, kind
+ end
+ end
+ end
+
+ # Iterates over all text tokens.
+ # Range tokens like [:open, :string] are left out.
+ #
+ # Example:
+ # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
+ def each_text_token
+ orig_each do |text, kind|
+ next unless text.respond_to? :to_str
+ yield text, kind
+ end
+ end
+
+ # Encode the tokens using encoder.
+ #
+ # encoder can be
+ # * a symbol like :html oder :statistic
+ # * an Encoder class
+ # * an Encoder object
+ #
+ # options are passed to the encoder.
+ def encode encoder, options = {}
+ unless encoder.is_a? Encoders::Encoder
+ unless encoder.is_a? Class
+ encoder_class = Encoders[encoder]
+ end
+ encoder = encoder_class.new options
+ end
+ encoder.encode_tokens self, options
+ end
+
+ # Redirects unknown methods to encoder calls.
+ #
+ # For example, if you call +tokens.html+, the HTML encoder
+ # is used to highlight the tokens.
+ def method_missing meth, options = {}
+ Encoders[meth].new(options).encode_tokens self
+ end
+
+ # Returns the tokens compressed by joining consecutive
+ # tokens of the same kind.
+ #
+ # This can not be undone, but should yield the same output
+ # in most Encoders. It basically makes the output smaller.
+ #
+ # Combined with dump, it saves database space.
+ def optimize
+ last_kind, last_text = nil, nil
+ new = self.class.new
+ each do |text, kind|
+ if text.is_a? String
+ if kind == last_kind
+ last_text << text
+ else
+ new << [last_text, last_kind] if last_kind
+ last_text = text
+ last_kind = kind
+ end
+ else
+ new << [last_text, last_kind] if last_kind
+ last_kind, last_text = nil, nil
+ new << [text, kind]
+ end
+ end
+ new << [last_text, last_kind] if last_kind
+ new
+ end
+
+ # Compact the object itself; see compact.
+ def optimize!
+ replace optimize
+ end
+
+ # Dumps the object into a String that can be saved
+ # in files or databases.
+ #
+ # The dump is created with Marshal.dump;
+ # In addition, it is gzipped using GZip.gzip.
+ #
+ # The returned String object includes Undumping
+ # so it has an #undump method. See Tokens.load.
+ #
+ # You can configure the level of compression,
+ # but the default value 7 should be what you want
+ # in most cases as it is a good comprimise between
+ # speed and compression rate.
+ #
+ # See GZip module.
+ def dump gzip_level = 7
+ require 'coderay/helpers/gzip_simple'
+ dump = Marshal.dump self
+ dump = dump.gzip gzip_level
+ dump.extend Undumping
+ end
+
+ # The total size of the tokens;
+ # Should be equal to the input size before
+ # scanning.
+ def text_size
+ map { |t, k| t }.join.size
+ end
+
+ # Include this module to give an object an #undump
+ # method.
+ #
+ # The string returned by Tokens.dump includes Undumping.
+ module Undumping
+ # Calls Tokens.load with itself.
+ def undump
+ Tokens.load self
+ end
+ end
+
+ # Undump the object using Marshal.load, then
+ # unzip it using GZip.gunzip.
+ #
+ # The result is commonly a Tokens object, but
+ # this is not guaranteed.
+ def Tokens.load dump
+ require 'coderay/helpers/gzip_simple'
+ dump = dump.gunzip
+ @dump = Marshal.load dump
+ end
+
+ end
+
+
+ # The TokenStream class is a fake Array without elements.
+ #
+ # It redirects the method << to a block given at creation.
+ #
+ # This allows scanners and Encoders to use streaming (no
+ # tokens are saved, the input is highlighted the same time it
+ # is scanned) with the same code.
+ #
+ # See CodeRay.encode_stream and CodeRay.scan_stream
+ class TokenStream < Tokens
+
+ # Whether the object is a TokenStream.
+ #
+ # Returns true.
+ def stream?
+ true
+ end
+
+ # The Array is empty, but size counts the tokens given by <<.
+ attr_reader :size
+
+ # Creates a new TokenStream that calls +block+ whenever
+ # its << method is called.
+ #
+ # Example:
+ #
+ # require 'coderay'
+ #
+ # token_stream = CodeRay::TokenStream.new do |kind, text|
+ # puts 'kind: %s, text size: %d.' % [kind, text.size]
+ # end
+ #
+ # token_stream << [:regexp, '/\d+/']
+ # #-> kind: rexpexp, text size: 5.
+ #
+ def initialize &block
+ raise ArgumentError, 'Block expected for streaming.' unless block
+ @callback = block
+ @size = 0
+ end
+
+ # Calls +block+ with +token+ and increments size.
+ def << token
+ @callback.call token
+ @size += 1
+ end
+
+ # This method is not implemented due to speed reasons. Use Tokens.
+ def text_size
+ raise NotImplementedError, 'This method is not implemented due to speed reasons.'
+ end
+
+ # A TokenStream cannot be dumped. Use Tokens.
+ def dump
+ raise NotImplementedError, 'A TokenStream cannot be dumped.'
+ end
+
+ # A TokenStream cannot be compacted. Use Tokens.
+ def compact
+ raise NotImplementedError, 'A TokenStream cannot be compacted.'
+ end
+
+ end
+
+end
+
+# vim:sw=2:ts=2:et:tw=78
|