From 7bb2aef0553091a10c197e302475c9f14de8a860 Mon Sep 17 00:00:00 2001 From: murphy Date: Tue, 11 Jul 2006 05:40:21 +0000 Subject: rake test now runs in debug mode. All .rb files converted to UNIX format (where did the \r come from?) --- lib/coderay/duo.rb | 58 +-- lib/coderay/encoder.rb | 346 ++++++++-------- lib/coderay/encoders/_map.rb | 16 +- lib/coderay/encoders/count.rb | 42 +- lib/coderay/encoders/debug.rb | 92 ++--- lib/coderay/encoders/div.rb | 40 +- lib/coderay/encoders/html/classes.rb | 146 +++---- lib/coderay/encoders/html/css.rb | 130 +++--- lib/coderay/encoders/html/numerization.rb | 244 +++++------ lib/coderay/encoders/html/output.rb | 390 +++++++++--------- lib/coderay/encoders/null.rb | 52 +-- lib/coderay/encoders/page.rb | 42 +- lib/coderay/encoders/span.rb | 40 +- lib/coderay/encoders/statistic.rb | 162 ++++---- lib/coderay/encoders/text.rb | 66 +-- lib/coderay/encoders/tokens.rb | 88 ++-- lib/coderay/encoders/xml.rb | 142 +++---- lib/coderay/encoders/yaml.rb | 44 +- lib/coderay/scanners/plaintext.rb | 30 +- lib/coderay/scanners/ruby/patterns.rb | 432 ++++++++++---------- lib/coderay/scanners/xml.rb | 36 +- lib/coderay/style.rb | 40 +- lib/coderay/tokens.rb | 644 +++++++++++++++--------------- rake_tasks/test.rake | 2 +- 24 files changed, 1662 insertions(+), 1662 deletions(-) diff --git a/lib/coderay/duo.rb b/lib/coderay/duo.rb index 3125568..0e5956e 100644 --- a/lib/coderay/duo.rb +++ b/lib/coderay/duo.rb @@ -1,29 +1,29 @@ -module CodeRay - - # = Duo - # - # $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $ - # - # TODO: Doc. - class Duo - - attr_accessor :scanner, :encoder - - def initialize lang, format, options = {} - @scanner = CodeRay.scanner lang, CodeRay.get_scanner_options(options) - @encoder = CodeRay.encoder format, options - end - - class << self - alias [] new - end - - def encode code - @scanner.string = code - @encoder.encode_tokens(scanner.tokenize) - end - alias highlight encode - - end - -end +module CodeRay + + # = Duo + # + # $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $ + # + # TODO: Doc. + class Duo + + attr_accessor :scanner, :encoder + + def initialize lang, format, options = {} + @scanner = CodeRay.scanner lang, CodeRay.get_scanner_options(options) + @encoder = CodeRay.encoder format, options + end + + class << self + alias [] new + end + + def encode code + @scanner.string = code + @encoder.encode_tokens(scanner.tokenize) + end + alias highlight encode + + end + +end diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb index 1065a9c..221cd58 100644 --- a/lib/coderay/encoder.rb +++ b/lib/coderay/encoder.rb @@ -1,173 +1,173 @@ -module CodeRay - - # This module holds the Encoder class and its subclasses. - # For example, the HTML encoder is named CodeRay::Encoders::HTML - # can be found in coderay/encoders/html. - # - # Encoders also provides methods and constants for the register - # mechanism and the [] method that returns the Encoder class - # belonging to the given format. - module Encoders - extend PluginHost - plugin_path File.dirname(__FILE__), 'encoders' - - # = Encoder - # - # The Encoder base class. Together with Scanner and - # Tokens, it forms the highlighting triad. - # - # Encoder instances take a Tokens object and do something with it. - # - # The most common Encoder is surely the HTML encoder - # (CodeRay::Encoders::HTML). It highlights the code in a colorful - # html page. - # If you want the highlighted code in a div or a span instead, - # use its subclasses Div and Span. - class Encoder - extend Plugin - plugin_host Encoders - - attr_reader :token_stream - - class << self - - # Returns if the Encoder can be used in streaming mode. - def streamable? - is_a? Streamable - end - - # If FILE_EXTENSION isn't defined, this method returns the - # downcase class name instead. - def const_missing sym - if sym == :FILE_EXTENSION - sym.to_s.downcase - else - super - end - end - - end - - # Subclasses are to store their default options in this constant. - DEFAULT_OPTIONS = { :stream => false } - - # The options you gave the Encoder at creating. - attr_accessor :options - - # Creates a new Encoder. - # +options+ is saved and used for all encode operations, as long - # as you don't overwrite it there by passing additional options. - # - # Encoder objects provide three encode methods: - # - encode simply takes a +code+ string and a +lang+ - # - encode_tokens expects a +tokens+ object instead - # - encode_stream is like encode, but uses streaming mode. - # - # Each method has an optional +options+ parameter. These are - # added to the options you passed at creation. - def initialize options = {} - @options = self.class::DEFAULT_OPTIONS.merge options - raise "I am only the basic Encoder class. I can't encode "\ - "anything. :( Use my subclasses." if self.class == Encoder - end - - # Encode a Tokens object. - def encode_tokens tokens, options = {} - options = @options.merge options - setup options - compile tokens, options - finish options - end - - # Encode the given +code+ after tokenizing it using the Scanner - # for +lang+. - def encode code, lang, options = {} - options = @options.merge options - scanner_options = CodeRay.get_scanner_options(options) - tokens = CodeRay.scan code, lang, scanner_options - encode_tokens tokens, options - end - - # You can use highlight instead of encode, if that seems - # more clear to you. - alias highlight encode - - # Encode the given +code+ using the Scanner for +lang+ in - # streaming mode. - def encode_stream code, lang, options = {} - raise NotStreamableError, self unless kind_of? Streamable - options = @options.merge options - setup options - scanner_options = CodeRay.get_scanner_options options - @token_stream = - CodeRay.scan_stream code, lang, scanner_options, &self - finish options - end - - # Behave like a proc. The token method is converted to a proc. - def to_proc - method(:token).to_proc - end - - # Return the default file extension for outputs of this encoder. - def file_extension - self.class::FILE_EXTENSION - end - - protected - - # Called with merged options before encoding starts. - # Sets @out to an empty string. - # - # See the HTML Encoder for an example of option caching. - def setup options - @out = '' - end - - # Called with +text+ and +kind+ of the currently scanned token. - # For simple scanners, it's enougth to implement this method. - # - # By default, it calls text_token or block_token, depending on - # whether +text+ is a String. - def token text, kind - if text.is_a? ::String - text_token text, kind - elsif text.is_a? ::Symbol - block_token text, kind - else - raise 'Unknown token text type: %p' % text - end - end - - def text_token text, kind - end - - def block_token action, kind - case action - when :open - open_token kind - when :close - close_token kind - else - raise 'unknown block action: %p' % action - end - end - - # Called with merged options after encoding starts. - # The return value is the result of encoding, typically @out. - def finish options - @out - end - - # Do the encoding. - # - # The already created +tokens+ object must be used; it can be a - # TokenStream or a Tokens object. - def compile tokens, options - tokens.each(&self) - end - - end - - end -end +module CodeRay + + # This module holds the Encoder class and its subclasses. + # For example, the HTML encoder is named CodeRay::Encoders::HTML + # can be found in coderay/encoders/html. + # + # Encoders also provides methods and constants for the register + # mechanism and the [] method that returns the Encoder class + # belonging to the given format. + module Encoders + extend PluginHost + plugin_path File.dirname(__FILE__), 'encoders' + + # = Encoder + # + # The Encoder base class. Together with Scanner and + # Tokens, it forms the highlighting triad. + # + # Encoder instances take a Tokens object and do something with it. + # + # The most common Encoder is surely the HTML encoder + # (CodeRay::Encoders::HTML). It highlights the code in a colorful + # html page. + # If you want the highlighted code in a div or a span instead, + # use its subclasses Div and Span. + class Encoder + extend Plugin + plugin_host Encoders + + attr_reader :token_stream + + class << self + + # Returns if the Encoder can be used in streaming mode. + def streamable? + is_a? Streamable + end + + # If FILE_EXTENSION isn't defined, this method returns the + # downcase class name instead. + def const_missing sym + if sym == :FILE_EXTENSION + sym.to_s.downcase + else + super + end + end + + end + + # Subclasses are to store their default options in this constant. + DEFAULT_OPTIONS = { :stream => false } + + # The options you gave the Encoder at creating. + attr_accessor :options + + # Creates a new Encoder. + # +options+ is saved and used for all encode operations, as long + # as you don't overwrite it there by passing additional options. + # + # Encoder objects provide three encode methods: + # - encode simply takes a +code+ string and a +lang+ + # - encode_tokens expects a +tokens+ object instead + # - encode_stream is like encode, but uses streaming mode. + # + # Each method has an optional +options+ parameter. These are + # added to the options you passed at creation. + def initialize options = {} + @options = self.class::DEFAULT_OPTIONS.merge options + raise "I am only the basic Encoder class. I can't encode "\ + "anything. :( Use my subclasses." if self.class == Encoder + end + + # Encode a Tokens object. + def encode_tokens tokens, options = {} + options = @options.merge options + setup options + compile tokens, options + finish options + end + + # Encode the given +code+ after tokenizing it using the Scanner + # for +lang+. + def encode code, lang, options = {} + options = @options.merge options + scanner_options = CodeRay.get_scanner_options(options) + tokens = CodeRay.scan code, lang, scanner_options + encode_tokens tokens, options + end + + # You can use highlight instead of encode, if that seems + # more clear to you. + alias highlight encode + + # Encode the given +code+ using the Scanner for +lang+ in + # streaming mode. + def encode_stream code, lang, options = {} + raise NotStreamableError, self unless kind_of? Streamable + options = @options.merge options + setup options + scanner_options = CodeRay.get_scanner_options options + @token_stream = + CodeRay.scan_stream code, lang, scanner_options, &self + finish options + end + + # Behave like a proc. The token method is converted to a proc. + def to_proc + method(:token).to_proc + end + + # Return the default file extension for outputs of this encoder. + def file_extension + self.class::FILE_EXTENSION + end + + protected + + # Called with merged options before encoding starts. + # Sets @out to an empty string. + # + # See the HTML Encoder for an example of option caching. + def setup options + @out = '' + end + + # Called with +text+ and +kind+ of the currently scanned token. + # For simple scanners, it's enougth to implement this method. + # + # By default, it calls text_token or block_token, depending on + # whether +text+ is a String. + def token text, kind + if text.is_a? ::String + text_token text, kind + elsif text.is_a? ::Symbol + block_token text, kind + else + raise 'Unknown token text type: %p' % text + end + end + + def text_token text, kind + end + + def block_token action, kind + case action + when :open + open_token kind + when :close + close_token kind + else + raise 'unknown block action: %p' % action + end + end + + # Called with merged options after encoding starts. + # The return value is the result of encoding, typically @out. + def finish options + @out + end + + # Do the encoding. + # + # The already created +tokens+ object must be used; it can be a + # TokenStream or a Tokens object. + def compile tokens, options + tokens.each(&self) + end + + end + + end +end diff --git a/lib/coderay/encoders/_map.rb b/lib/coderay/encoders/_map.rb index a22a951..fdd8ae4 100644 --- a/lib/coderay/encoders/_map.rb +++ b/lib/coderay/encoders/_map.rb @@ -1,8 +1,8 @@ -module CodeRay -module Encoders - - map :stats => :statistic, - :plain => :text - -end -end +module CodeRay +module Encoders + + map :stats => :statistic, + :plain => :text + +end +end diff --git a/lib/coderay/encoders/count.rb b/lib/coderay/encoders/count.rb index 6885541..c9a6dfd 100644 --- a/lib/coderay/encoders/count.rb +++ b/lib/coderay/encoders/count.rb @@ -1,21 +1,21 @@ -module CodeRay -module Encoders - - class Count < Encoder - - include Streamable - register_for :count - - protected - - def setup options - @out = 0 - end - - def token text, kind - @out += 1 - end - end - -end -end +module CodeRay +module Encoders + + class Count < Encoder + + include Streamable + register_for :count + + protected + + def setup options + @out = 0 + end + + def token text, kind + @out += 1 + end + end + +end +end diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb index 2639e1f..eb9eaa4 100644 --- a/lib/coderay/encoders/debug.rb +++ b/lib/coderay/encoders/debug.rb @@ -1,46 +1,46 @@ -module CodeRay -module Encoders - - # = Debug Encoder - # - # Fast encoder producing simple debug output. - # - # It is readable and diff-able and is used for testing. - # - # You cannot fully restore the tokens information from the - # output, because consecutive :space tokens are merged. - # Use Tokens#dump for caching purposes. - class Debug < Encoder - - include Streamable - register_for :debug - - FILE_EXTENSION = 'raydebug' - - protected - def text_token text, kind - @out << - if kind == :space - text - else - text = text.gsub(/[)\\]/, '\\\\\0') - "#{kind}(#{text})" - end - end - - def block_token action, kind - @out << super - end - - def open_token kind - "#{kind}<" - end - - def close_token kind - ">" - end - - end - -end -end +module CodeRay +module Encoders + + # = Debug Encoder + # + # Fast encoder producing simple debug output. + # + # It is readable and diff-able and is used for testing. + # + # You cannot fully restore the tokens information from the + # output, because consecutive :space tokens are merged. + # Use Tokens#dump for caching purposes. + class Debug < Encoder + + include Streamable + register_for :debug + + FILE_EXTENSION = 'raydebug' + + protected + def text_token text, kind + @out << + if kind == :space + text + else + text = text.gsub(/[)\\]/, '\\\\\0') + "#{kind}(#{text})" + end + end + + def block_token action, kind + @out << super + end + + def open_token kind + "#{kind}<" + end + + def close_token kind + ">" + end + + end + +end +end diff --git a/lib/coderay/encoders/div.rb b/lib/coderay/encoders/div.rb index ce558f2..3d55415 100644 --- a/lib/coderay/encoders/div.rb +++ b/lib/coderay/encoders/div.rb @@ -1,20 +1,20 @@ -module CodeRay -module Encoders - - load :html - - class Div < HTML - - FILE_EXTENSION = 'div.html' - - register_for :div - - DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ - :css => :style, - :wrap => :div, - }) - - end - -end -end +module CodeRay +module Encoders + + load :html + + class Div < HTML + + FILE_EXTENSION = 'div.html' + + register_for :div + + DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ + :css => :style, + :wrap => :div, + }) + + end + +end +end diff --git a/lib/coderay/encoders/html/classes.rb b/lib/coderay/encoders/html/classes.rb index 8493fa0..ea15ca0 100644 --- a/lib/coderay/encoders/html/classes.rb +++ b/lib/coderay/encoders/html/classes.rb @@ -1,73 +1,73 @@ -module CodeRay -module Encoders - - class HTML - - ClassOfKind = { - :attribute_name => 'an', - :attribute_name_fat => 'af', - :attribute_value => 'av', - :attribute_value_fat => 'aw', - :bin => 'bi', - :char => 'ch', - :class => 'cl', - :class_variable => 'cv', - :color => 'cr', - :comment => 'c', - :constant => 'co', - :content => 'k', - :definition => 'df', - :delimiter => 'dl', - :directive => 'di', - :doc => 'do', - :doc_string => 'ds', - :entity => 'en', - :error => 'er', - :escape => 'e', - :exception => 'ex', - :float => 'fl', - :function => 'fu', - :global_variable => 'gv', - :hex => 'hx', - :include => 'ic', - :inline => 'il', - :instance_variable => 'iv', - :integer => 'i', - :interpreted => 'in', - :label => 'la', - :local_variable => 'lv', - :modifier => 'mod', - :oct => 'oc', - :operator_name => 'on', - :pre_constant => 'pc', - :pre_type => 'pt', - :predefined => 'pd', - :preprocessor => 'pp', - :regexp => 'rx', - :reserved => 'r', - :shell => 'sh', - :string => 's', - :symbol => 'sy', - :tag => 'ta', - :tag_fat => 'tf', - :tag_special => 'ts', - :type => 'ty', - :variable => 'v', - :xml_text => 'xt', - - :ident => :NO_HIGHLIGHT, # 'id' - #:operator => 'op', - :operator => :NO_HIGHLIGHT, # 'op' - :space => :NO_HIGHLIGHT, # 'sp' - :plain => :NO_HIGHLIGHT, - } - ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function] - ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter] - ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter] - ClassOfKind[:escape] = ClassOfKind[:delimiter] - ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!' - - end - -end -end +module CodeRay +module Encoders + + class HTML + + ClassOfKind = { + :attribute_name => 'an', + :attribute_name_fat => 'af', + :attribute_value => 'av', + :attribute_value_fat => 'aw', + :bin => 'bi', + :char => 'ch', + :class => 'cl', + :class_variable => 'cv', + :color => 'cr', + :comment => 'c', + :constant => 'co', + :content => 'k', + :definition => 'df', + :delimiter => 'dl', + :directive => 'di', + :doc => 'do', + :doc_string => 'ds', + :entity => 'en', + :error => 'er', + :escape => 'e', + :exception => 'ex', + :float => 'fl', + :function => 'fu', + :global_variable => 'gv', + :hex => 'hx', + :include => 'ic', + :inline => 'il', + :instance_variable => 'iv', + :integer => 'i', + :interpreted => 'in', + :label => 'la', + :local_variable => 'lv', + :modifier => 'mod', + :oct => 'oc', + :operator_name => 'on', + :pre_constant => 'pc', + :pre_type => 'pt', + :predefined => 'pd', + :preprocessor => 'pp', + :regexp => 'rx', + :reserved => 'r', + :shell => 'sh', + :string => 's', + :symbol => 'sy', + :tag => 'ta', + :tag_fat => 'tf', + :tag_special => 'ts', + :type => 'ty', + :variable => 'v', + :xml_text => 'xt', + + :ident => :NO_HIGHLIGHT, # 'id' + #:operator => 'op', + :operator => :NO_HIGHLIGHT, # 'op' + :space => :NO_HIGHLIGHT, # 'sp' + :plain => :NO_HIGHLIGHT, + } + ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function] + ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter] + ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter] + ClassOfKind[:escape] = ClassOfKind[:delimiter] + ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!' + + end + +end +end diff --git a/lib/coderay/encoders/html/css.rb b/lib/coderay/encoders/html/css.rb index b76d682..d577602 100644 --- a/lib/coderay/encoders/html/css.rb +++ b/lib/coderay/encoders/html/css.rb @@ -1,65 +1,65 @@ -module CodeRay -module Encoders - - class HTML - class CSS - - attr :stylesheet - - def CSS.load_stylesheet style = nil - CodeRay::Styles[style] - end - - def initialize style = :default - @classes = Hash.new - style = CSS.load_stylesheet style - @stylesheet = [ - style::CSS_MAIN_STYLES, - style::TOKEN_COLORS.gsub(/^(?!$)/, '.CodeRay ') - ].join("\n") - parse style::TOKEN_COLORS - end - - def [] *styles - cl = @classes[styles.first] - return '' unless cl - style = '' - 1.upto(styles.size) do |offset| - break if style = cl[styles[offset .. -1]] - end - raise 'Style not found: %p' % [styles] if $DEBUG and style.empty? - return style - end - - private - - CSS_CLASS_PATTERN = / - ( (?: # $1 = classes - \s* \. [-\w]+ - )+ ) - \s* \{ \s* - ( [^\}]+ )? # $2 = style - \s* \} \s* - | - ( . ) # $3 = error - /mx - def parse stylesheet - stylesheet.scan CSS_CLASS_PATTERN do |classes, style, error| - raise "CSS parse error: '#{error.inspect}' not recognized" if error - styles = classes.scan(/[-\w]+/) - cl = styles.pop - @classes[cl] ||= Hash.new - @classes[cl][styles] = style.to_s.strip - end - end - - end - end - -end -end - -if $0 == __FILE__ - require 'pp' - pp CodeRay::Encoders::HTML::CSS.new -end +module CodeRay +module Encoders + + class HTML + class CSS + + attr :stylesheet + + def CSS.load_stylesheet style = nil + CodeRay::Styles[style] + end + + def initialize style = :default + @classes = Hash.new + style = CSS.load_stylesheet style + @stylesheet = [ + style::CSS_MAIN_STYLES, + style::TOKEN_COLORS.gsub(/^(?!$)/, '.CodeRay ') + ].join("\n") + parse style::TOKEN_COLORS + end + + def [] *styles + cl = @classes[styles.first] + return '' unless cl + style = '' + 1.upto(styles.size) do |offset| + break if style = cl[styles[offset .. -1]] + end + raise 'Style not found: %p' % [styles] if $DEBUG and style.empty? + return style + end + + private + + CSS_CLASS_PATTERN = / + ( (?: # $1 = classes + \s* \. [-\w]+ + )+ ) + \s* \{ \s* + ( [^\}]+ )? # $2 = style + \s* \} \s* + | + ( . ) # $3 = error + /mx + def parse stylesheet + stylesheet.scan CSS_CLASS_PATTERN do |classes, style, error| + raise "CSS parse error: '#{error.inspect}' not recognized" if error + styles = classes.scan(/[-\w]+/) + cl = styles.pop + @classes[cl] ||= Hash.new + @classes[cl][styles] = style.to_s.strip + end + end + + end + end + +end +end + +if $0 == __FILE__ + require 'pp' + pp CodeRay::Encoders::HTML::CSS.new +end diff --git a/lib/coderay/encoders/html/numerization.rb b/lib/coderay/encoders/html/numerization.rb index 2960f87..1e4a4ed 100644 --- a/lib/coderay/encoders/html/numerization.rb +++ b/lib/coderay/encoders/html/numerization.rb @@ -1,122 +1,122 @@ -module CodeRay -module Encoders - - class HTML - - module Output - - def numerize *args - clone.numerize!(*args) - end - -=begin NUMERIZABLE_WRAPPINGS = { - :table => [:div, :page, nil], - :inline => :all, - :list => [:div, :page, nil] - } - NUMERIZABLE_WRAPPINGS.default = :all -=end - def numerize! mode = :table, options = {} - return self unless mode - - options = DEFAULT_OPTIONS.merge options - - start = options[:line_number_start] - unless start.is_a? Integer - raise ArgumentError, "Invalid value %p for :line_number_start; Integer expected." % start - end - - #allowed_wrappings = NUMERIZABLE_WRAPPINGS[mode] - #unless allowed_wrappings == :all or allowed_wrappings.include? options[:wrap] - # raise ArgumentError, "Can't numerize, :wrap must be in %p, but is %p" % [NUMERIZABLE_WRAPPINGS, options[:wrap]] - #end - - bold_every = options[:bold_every] - bolding = - if bold_every == false - proc { |line| line.to_s } - elsif bold_every.is_a? Integer - raise ArgumentError, ":bolding can't be 0." if bold_every == 0 - proc do |line| - if line % bold_every == 0 - "#{line}" # every bold_every-th number in bold - else - line.to_s - end - end - else - raise ArgumentError, 'Invalid value %p for :bolding; false or Integer expected.' % bold_every - end - - case mode - when :inline - max_width = (start + line_count).to_s.size - line = start - gsub!(/^/) do - line_number = bolding.call line - indent = ' ' * (max_width - line.to_s.size) - res = "#{indent}#{line_number} " - line += 1 - res - end - - when :table - # This is really ugly. - # Because even monospace fonts seem to have different heights when bold, - # I make the newline bold, both in the code and the line numbers. - # FIXME Still not working perfect for Mr. Internet Exploder - # FIXME Firefox struggles with very long codes (> 200 lines) - line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n") - line_numbers << "\n" # also for Mr. MS Internet Exploder :-/ - line_numbers.gsub!(/\n/) { "\n" } - - line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers) - gsub!(/\n/) { "\n" } - wrap_in! line_numbers_table_tpl - @wrapped_in = :div - - when :list - opened_tags = [] - gsub!(/^.*$\n?/) do |line| - line.chomp! - - open = opened_tags.join - line.scan(%r!<(/)?span[^>]*>?!) do |close,| - if close - opened_tags.pop - else - opened_tags << $& - end - end - close = '' * opened_tags.size - - "
  • #{open}#{line}#{close}
  • " - end - wrap_in! LIST - @wrapped_in = :div - - else - raise ArgumentError, 'Unknown value %p for mode: expected one of %p' % - [mode, [:table, :list, :inline]] - end - - self - end - - def line_count - line_count = count("\n") - position_of_last_newline = rindex(?\n) - if position_of_last_newline - after_last_newline = self[position_of_last_newline + 1 .. -1] - ends_with_newline = after_last_newline[/\A(?:<\/span>)*\z/] - line_count += 1 if not ends_with_newline - end - line_count - end - - end - - end - -end -end +module CodeRay +module Encoders + + class HTML + + module Output + + def numerize *args + clone.numerize!(*args) + end + +=begin NUMERIZABLE_WRAPPINGS = { + :table => [:div, :page, nil], + :inline => :all, + :list => [:div, :page, nil] + } + NUMERIZABLE_WRAPPINGS.default = :all +=end + def numerize! mode = :table, options = {} + return self unless mode + + options = DEFAULT_OPTIONS.merge options + + start = options[:line_number_start] + unless start.is_a? Integer + raise ArgumentError, "Invalid value %p for :line_number_start; Integer expected." % start + end + + #allowed_wrappings = NUMERIZABLE_WRAPPINGS[mode] + #unless allowed_wrappings == :all or allowed_wrappings.include? options[:wrap] + # raise ArgumentError, "Can't numerize, :wrap must be in %p, but is %p" % [NUMERIZABLE_WRAPPINGS, options[:wrap]] + #end + + bold_every = options[:bold_every] + bolding = + if bold_every == false + proc { |line| line.to_s } + elsif bold_every.is_a? Integer + raise ArgumentError, ":bolding can't be 0." if bold_every == 0 + proc do |line| + if line % bold_every == 0 + "#{line}" # every bold_every-th number in bold + else + line.to_s + end + end + else + raise ArgumentError, 'Invalid value %p for :bolding; false or Integer expected.' % bold_every + end + + case mode + when :inline + max_width = (start + line_count).to_s.size + line = start + gsub!(/^/) do + line_number = bolding.call line + indent = ' ' * (max_width - line.to_s.size) + res = "#{indent}#{line_number} " + line += 1 + res + end + + when :table + # This is really ugly. + # Because even monospace fonts seem to have different heights when bold, + # I make the newline bold, both in the code and the line numbers. + # FIXME Still not working perfect for Mr. Internet Exploder + # FIXME Firefox struggles with very long codes (> 200 lines) + line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n") + line_numbers << "\n" # also for Mr. MS Internet Exploder :-/ + line_numbers.gsub!(/\n/) { "\n" } + + line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers) + gsub!(/\n/) { "\n" } + wrap_in! line_numbers_table_tpl + @wrapped_in = :div + + when :list + opened_tags = [] + gsub!(/^.*$\n?/) do |line| + line.chomp! + + open = opened_tags.join + line.scan(%r!<(/)?span[^>]*>?!) do |close,| + if close + opened_tags.pop + else + opened_tags << $& + end + end + close = '' * opened_tags.size + + "
  • #{open}#{line}#{close}
  • " + end + wrap_in! LIST + @wrapped_in = :div + + else + raise ArgumentError, 'Unknown value %p for mode: expected one of %p' % + [mode, [:table, :list, :inline]] + end + + self + end + + def line_count + line_count = count("\n") + position_of_last_newline = rindex(?\n) + if position_of_last_newline + after_last_newline = self[position_of_last_newline + 1 .. -1] + ends_with_newline = after_last_newline[/\A(?:<\/span>)*\z/] + line_count += 1 if not ends_with_newline + end + line_count + end + + end + + end + +end +end diff --git a/lib/coderay/encoders/html/output.rb b/lib/coderay/encoders/html/output.rb index 61258ee..e74e55e 100644 --- a/lib/coderay/encoders/html/output.rb +++ b/lib/coderay/encoders/html/output.rb @@ -1,195 +1,195 @@ -module CodeRay -module Encoders - - class HTML - - # This module is included in the output String from thew HTML Encoder. - # - # It provides methods like wrap, div, page etc. - # - # Remember to use #clone instead of #dup to keep the modules the object was - # extended with. - # - # TODO: more doc. - module Output - - require 'coderay/encoders/html/numerization.rb' - - attr_accessor :css - - class << self - - # This makes Output look like a class. - # - # Example: - # - # a = Output.new 'Code' - # a.wrap! :page - def new string, css = CSS.new, element = nil - output = string.clone.extend self - output.wrapped_in = element - output.css = css - output - end - - # Raises an exception if an object that doesn't respond to to_str is extended by Output, - # to prevent users from misuse. Use Module#remove_method to disable. - def extended o - warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str - end - - def make_stylesheet css, in_tag = false - sheet = css.stylesheet - sheet = <<-CSS if in_tag - - CSS - sheet - end - - def page_template_for_css css - sheet = make_stylesheet css - PAGE.apply 'CSS', sheet - end - - # Define a new wrapper. This is meta programming. - def wrapper *wrappers - wrappers.each do |wrapper| - define_method wrapper do |*args| - wrap wrapper, *args - end - define_method "#{wrapper}!".to_sym do |*args| - wrap! wrapper, *args - end - end - end - - end - - wrapper :div, :span, :page - - def wrapped_in? element - wrapped_in == element - end - - def wrapped_in - @wrapped_in ||= nil - end - attr_writer :wrapped_in - - def wrap_in template - clone.wrap_in! template - end - - def wrap_in! template - Template.wrap! self, template, 'CONTENT' - self - end - - def wrap! element, *args - return self if not element or element == wrapped_in - case element - when :div - raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil - wrap_in! DIV - when :span - raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil - wrap_in! SPAN - when :page - wrap! :div if wrapped_in? nil - raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div - wrap_in! Output.page_template_for_css(@css) - when nil - return self - else - raise "Unknown value %p for :wrap" % element - end - @wrapped_in = element - self - end - - def wrap *args - clone.wrap!(*args) - end - - def stylesheet in_tag = false - Output.make_stylesheet @css, in_tag - end - - class Template < String - - def self.wrap! str, template, target - target = Regexp.new(Regexp.escape("<%#{target}%>")) - if template =~ target - str[0,0] = $` - str << $' - else - raise "Template target <%%%p%%> not found" % target - end - end - - def apply target, replacement - target = Regexp.new(Regexp.escape("<%#{target}%>")) - if self =~ target - Template.new($` + replacement + $') - else - raise "Template target <%%%p%%> not found" % target - end - end - - module Simple - def ` str #` <-- for stupid editors - Template.new str - end - end - end - - extend Template::Simple - -#-- don't include the templates in docu - - SPAN = `<%CONTENT%>` - - DIV = <<-`DIV` -
    -
    <%CONTENT%>
    -
    - DIV - - TABLE = <<-`TABLE` - - - -
    <%LINE_NUMBERS%>
    <%CONTENT%>
    - TABLE - # title="double click to expand" - - LIST = <<-`LIST` -
      <%CONTENT%>
    - LIST - - PAGE = <<-`PAGE` - - - - - CodeRay HTML Encoder Example - - - - -<%CONTENT%> - - - PAGE - - end - - end - -end -end +module CodeRay +module Encoders + + class HTML + + # This module is included in the output String from thew HTML Encoder. + # + # It provides methods like wrap, div, page etc. + # + # Remember to use #clone instead of #dup to keep the modules the object was + # extended with. + # + # TODO: more doc. + module Output + + require 'coderay/encoders/html/numerization.rb' + + attr_accessor :css + + class << self + + # This makes Output look like a class. + # + # Example: + # + # a = Output.new 'Code' + # a.wrap! :page + def new string, css = CSS.new, element = nil + output = string.clone.extend self + output.wrapped_in = element + output.css = css + output + end + + # Raises an exception if an object that doesn't respond to to_str is extended by Output, + # to prevent users from misuse. Use Module#remove_method to disable. + def extended o + warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str + end + + def make_stylesheet css, in_tag = false + sheet = css.stylesheet + sheet = <<-CSS if in_tag + + CSS + sheet + end + + def page_template_for_css css + sheet = make_stylesheet css + PAGE.apply 'CSS', sheet + end + + # Define a new wrapper. This is meta programming. + def wrapper *wrappers + wrappers.each do |wrapper| + define_method wrapper do |*args| + wrap wrapper, *args + end + define_method "#{wrapper}!".to_sym do |*args| + wrap! wrapper, *args + end + end + end + + end + + wrapper :div, :span, :page + + def wrapped_in? element + wrapped_in == element + end + + def wrapped_in + @wrapped_in ||= nil + end + attr_writer :wrapped_in + + def wrap_in template + clone.wrap_in! template + end + + def wrap_in! template + Template.wrap! self, template, 'CONTENT' + self + end + + def wrap! element, *args + return self if not element or element == wrapped_in + case element + when :div + raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil + wrap_in! DIV + when :span + raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil + wrap_in! SPAN + when :page + wrap! :div if wrapped_in? nil + raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div + wrap_in! Output.page_template_for_css(@css) + when nil + return self + else + raise "Unknown value %p for :wrap" % element + end + @wrapped_in = element + self + end + + def wrap *args + clone.wrap!(*args) + end + + def stylesheet in_tag = false + Output.make_stylesheet @css, in_tag + end + + class Template < String + + def self.wrap! str, template, target + target = Regexp.new(Regexp.escape("<%#{target}%>")) + if template =~ target + str[0,0] = $` + str << $' + else + raise "Template target <%%%p%%> not found" % target + end + end + + def apply target, replacement + target = Regexp.new(Regexp.escape("<%#{target}%>")) + if self =~ target + Template.new($` + replacement + $') + else + raise "Template target <%%%p%%> not found" % target + end + end + + module Simple + def ` str #` <-- for stupid editors + Template.new str + end + end + end + + extend Template::Simple + +#-- don't include the templates in docu + + SPAN = `<%CONTENT%>` + + DIV = <<-`DIV` +
    +
    <%CONTENT%>
    +
    + DIV + + TABLE = <<-`TABLE` + + + +
    <%LINE_NUMBERS%>
    <%CONTENT%>
    + TABLE + # title="double click to expand" + + LIST = <<-`LIST` +
      <%CONTENT%>
    + LIST + + PAGE = <<-`PAGE` + + + + + CodeRay HTML Encoder Example + + + + +<%CONTENT%> + + + PAGE + + end + + end + +end +end diff --git a/lib/coderay/encoders/null.rb b/lib/coderay/encoders/null.rb index 96d81fe..add3862 100644 --- a/lib/coderay/encoders/null.rb +++ b/lib/coderay/encoders/null.rb @@ -1,26 +1,26 @@ -module CodeRay -module Encoders - - # = Null Encoder - # - # Does nothing and returns an empty string. - class Null < Encoder - - include Streamable - register_for :null - - # Defined for faster processing - def to_proc - proc {} - end - - protected - - def token(*) - # do nothing - end - - end - -end -end +module CodeRay +module Encoders + + # = Null Encoder + # + # Does nothing and returns an empty string. + class Null < Encoder + + include Streamable + register_for :null + + # Defined for faster processing + def to_proc + proc {} + end + + protected + + def token(*) + # do nothing + end + + end + +end +end diff --git a/lib/coderay/encoders/page.rb b/lib/coderay/encoders/page.rb index 1ed7985..c08f094 100644 --- a/lib/coderay/encoders/page.rb +++ b/lib/coderay/encoders/page.rb @@ -1,21 +1,21 @@ -module CodeRay -module Encoders - - load :html - - class Page < HTML - - FILE_EXTENSION = 'html' - - register_for :page - - DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ - :css => :class, - :wrap => :page, - :line_numbers => :table - }) - - end - -end -end +module CodeRay +module Encoders + + load :html + + class Page < HTML + + FILE_EXTENSION = 'html' + + register_for :page + + DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ + :css => :class, + :wrap => :page, + :line_numbers => :table + }) + + end + +end +end diff --git a/lib/coderay/encoders/span.rb b/lib/coderay/encoders/span.rb index e892cb2..988afec 100644 --- a/lib/coderay/encoders/span.rb +++ b/lib/coderay/encoders/span.rb @@ -1,20 +1,20 @@ -module CodeRay -module Encoders - - load :html - - class Span < HTML - - FILE_EXTENSION = 'span.html' - - register_for :span - - DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ - :css => :style, - :wrap => :span, - }) - - end - -end -end +module CodeRay +module Encoders + + load :html + + class Span < HTML + + FILE_EXTENSION = 'span.html' + + register_for :span + + DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ + :css => :style, + :wrap => :span, + }) + + end + +end +end diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb index f80d5c8..e2a0460 100644 --- a/lib/coderay/encoders/statistic.rb +++ b/lib/coderay/encoders/statistic.rb @@ -1,81 +1,81 @@ -module CodeRay -module Encoders - - # Makes a statistic for the given tokens. - class Statistic < Encoder - - include Streamable - register_for :stats, :statistic - - attr_reader :type_stats, :real_token_count - - protected - - TypeStats = Struct.new :count, :size - - def setup options - @type_stats = Hash.new { |h, k| h[k] = TypeStats.new 0, 0 } - @real_token_count = 0 - end - - def generate tokens, options - @tokens = tokens - super - end - - def text_token text, kind - @real_token_count += 1 unless kind == :space - @type_stats[kind].count += 1 - @type_stats[kind].size += text.size - @type_stats['TOTAL'].size += text.size - end - - # TODO Hierarchy handling - def block_token action, kind - #@content_type = kind - @type_stats['open/close'].count += 1 - end - - def token text, kind - super - @type_stats['TOTAL'].count += 1 - end - - STATS = <<-STATS - -Code Statistics - -Tokens %8d - Non-Whitespace %8d -Bytes Total %8d - -Token Types (%d): - type count ratio size (average) -------------------------------------------------------------- -%s - STATS -# space 12007 33.81 % 1.7 - TOKEN_TYPES_ROW = <<-TKR - %-20s %8d %6.2f %% %5.1f - TKR - - def finish options - all = @type_stats['TOTAL'] - all_count, all_size = all.count, all.size - @type_stats.each do |type, stat| - stat.size /= stat.count.to_f - end - types_stats = @type_stats.sort_by { |k, v| [-v.count, k.to_s] }.map do |k, v| - TOKEN_TYPES_ROW % [k, v.count, 100.0 * v.count / all_count, v.size] - end.join - STATS % [ - all_count, @real_token_count, all_size, - @type_stats.delete_if { |k, v| k.is_a? String }.size, - types_stats - ] - end - - end - -end -end +module CodeRay +module Encoders + + # Makes a statistic for the given tokens. + class Statistic < Encoder + + include Streamable + register_for :stats, :statistic + + attr_reader :type_stats, :real_token_count + + protected + + TypeStats = Struct.new :count, :size + + def setup options + @type_stats = Hash.new { |h, k| h[k] = TypeStats.new 0, 0 } + @real_token_count = 0 + end + + def generate tokens, options + @tokens = tokens + super + end + + def text_token text, kind + @real_token_count += 1 unless kind == :space + @type_stats[kind].count += 1 + @type_stats[kind].size += text.size + @type_stats['TOTAL'].size += text.size + end + + # TODO Hierarchy handling + def block_token action, kind + #@content_type = kind + @type_stats['open/close'].count += 1 + end + + def token text, kind + super + @type_stats['TOTAL'].count += 1 + end + + STATS = <<-STATS + +Code Statistics + +Tokens %8d + Non-Whitespace %8d +Bytes Total %8d + +Token Types (%d): + type count ratio size (average) +------------------------------------------------------------- +%s + STATS +# space 12007 33.81 % 1.7 + TOKEN_TYPES_ROW = <<-TKR + %-20s %8d %6.2f %% %5.1f + TKR + + def finish options + all = @type_stats['TOTAL'] + all_count, all_size = all.count, all.size + @type_stats.each do |type, stat| + stat.size /= stat.count.to_f + end + types_stats = @type_stats.sort_by { |k, v| [-v.count, k.to_s] }.map do |k, v| + TOKEN_TYPES_ROW % [k, v.count, 100.0 * v.count / all_count, v.size] + end.join + STATS % [ + all_count, @real_token_count, all_size, + @type_stats.delete_if { |k, v| k.is_a? String }.size, + types_stats + ] + end + + end + +end +end diff --git a/lib/coderay/encoders/text.rb b/lib/coderay/encoders/text.rb index 31661ef..17256c6 100644 --- a/lib/coderay/encoders/text.rb +++ b/lib/coderay/encoders/text.rb @@ -1,33 +1,33 @@ -module CodeRay -module Encoders - - class Text < Encoder - - include Streamable - register_for :text - - FILE_EXTENSION = 'txt' - - DEFAULT_OPTIONS = { - :separator => '' - } - - protected - def setup options - super - @sep = options[:separator] - end - - def token text, kind - return unless text.respond_to? :to_str - @out << text + @sep - end - - def finish options - @out.chomp @sep - end - - end - -end -end +module CodeRay +module Encoders + + class Text < Encoder + + include Streamable + register_for :text + + FILE_EXTENSION = 'txt' + + DEFAULT_OPTIONS = { + :separator => '' + } + + protected + def setup options + super + @sep = options[:separator] + end + + def token text, kind + return unless text.respond_to? :to_str + @out << text + @sep + end + + def finish options + @out.chomp @sep + end + + end + +end +end diff --git a/lib/coderay/encoders/tokens.rb b/lib/coderay/encoders/tokens.rb index 743cc0e..2428589 100644 --- a/lib/coderay/encoders/tokens.rb +++ b/lib/coderay/encoders/tokens.rb @@ -1,44 +1,44 @@ -module CodeRay -module Encoders - - # The Tokens encoder converts the tokens to a simple - # readable format. It doesn't use colors and is mainly - # intended for console output. - # - # The tokens are converted with Tokens.write_token. - # - # The format is: - # - # \t \n - # - # Example: - # - # require 'coderay' - # puts CodeRay.scan("puts 3 + 4", :ruby).tokens - # - # prints: - # - # ident puts - # space - # integer 3 - # space - # operator + - # space - # integer 4 - # - class Tokens < Encoder - - include Streamable - register_for :tokens - - FILE_EXTENSION = 'tok' - - protected - def token *args - @out << CodeRay::Tokens.write_token(*args) - end - - end - -end -end +module CodeRay +module Encoders + + # The Tokens encoder converts the tokens to a simple + # readable format. It doesn't use colors and is mainly + # intended for console output. + # + # The tokens are converted with Tokens.write_token. + # + # The format is: + # + # \t \n + # + # Example: + # + # require 'coderay' + # puts CodeRay.scan("puts 3 + 4", :ruby).tokens + # + # prints: + # + # ident puts + # space + # integer 3 + # space + # operator + + # space + # integer 4 + # + class Tokens < Encoder + + include Streamable + register_for :tokens + + FILE_EXTENSION = 'tok' + + protected + def token *args + @out << CodeRay::Tokens.write_token(*args) + end + + end + +end +end diff --git a/lib/coderay/encoders/xml.rb b/lib/coderay/encoders/xml.rb index 21ef0cf..09e4549 100644 --- a/lib/coderay/encoders/xml.rb +++ b/lib/coderay/encoders/xml.rb @@ -1,71 +1,71 @@ -module CodeRay -module Encoders - - # = XML Encoder - # - # Uses REXML. Very slow. - class XML < Encoder - - include Streamable - register_for :xml - - FILE_EXTENSION = 'xml' - - require 'rexml/document' - - DEFAULT_OPTIONS = { - :tab_width => 8, - :pretty => -1, - :transitive => false, - } - - protected - - def setup options - @out = '' - @doc = REXML::Document.new - @doc << REXML::XMLDecl.new - @tab_width = options[:tab_width] - @root = @node = @doc.add_element('coderay-tokens') - end - - def finish options - @doc.write @out, options[:pretty], options[:transitive], true - @out - end - - def text_token text, kind - if kind == :space - token = @node - else - token = @node.add_element kind.to_s - end - text.scan(/(\x20+)|(\t+)|(\n)|[^\x20\t\n]+/) do |space, tab, nl| - case - when space - token << REXML::Text.new(space, true) - when tab - token << REXML::Text.new(tab, true) - when nl - token << REXML::Text.new(nl, true) - else - token << REXML::Text.new($&) - end - end - end - - def open_token kind - @node = @node.add_element kind.to_s - end - - def close_token kind - if @node == @root - raise 'no token to close!' - end - @node = @node.parent - end - - end - -end -end +module CodeRay +module Encoders + + # = XML Encoder + # + # Uses REXML. Very slow. + class XML < Encoder + + include Streamable + register_for :xml + + FILE_EXTENSION = 'xml' + + require 'rexml/document' + + DEFAULT_OPTIONS = { + :tab_width => 8, + :pretty => -1, + :transitive => false, + } + + protected + + def setup options + @out = '' + @doc = REXML::Document.new + @doc << REXML::XMLDecl.new + @tab_width = options[:tab_width] + @root = @node = @doc.add_element('coderay-tokens') + end + + def finish options + @doc.write @out, options[:pretty], options[:transitive], true + @out + end + + def text_token text, kind + if kind == :space + token = @node + else + token = @node.add_element kind.to_s + end + text.scan(/(\x20+)|(\t+)|(\n)|[^\x20\t\n]+/) do |space, tab, nl| + case + when space + token << REXML::Text.new(space, true) + when tab + token << REXML::Text.new(tab, true) + when nl + token << REXML::Text.new(nl, true) + else + token << REXML::Text.new($&) + end + end + end + + def open_token kind + @node = @node.add_element kind.to_s + end + + def close_token kind + if @node == @root + raise 'no token to close!' + end + @node = @node.parent + end + + end + +end +end diff --git a/lib/coderay/encoders/yaml.rb b/lib/coderay/encoders/yaml.rb index 47f64a4..5564e58 100644 --- a/lib/coderay/encoders/yaml.rb +++ b/lib/coderay/encoders/yaml.rb @@ -1,22 +1,22 @@ -module CodeRay -module Encoders - - # = YAML Encoder - # - # Slow. - class YAML < Encoder - - register_for :yaml - - FILE_EXTENSION = 'yaml' - - protected - def compile tokens, options - require 'yaml' - @out = tokens.to_a.to_yaml - end - - end - -end -end +module CodeRay +module Encoders + + # = YAML Encoder + # + # Slow. + class YAML < Encoder + + register_for :yaml + + FILE_EXTENSION = 'yaml' + + protected + def compile tokens, options + require 'yaml' + @out = tokens.to_a.to_yaml + end + + end + +end +end diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb index 9007646..432745f 100644 --- a/lib/coderay/scanners/plaintext.rb +++ b/lib/coderay/scanners/plaintext.rb @@ -1,15 +1,15 @@ -module CodeRay -module Scanners - - class Plaintext < Scanner - - register_for :plaintext, :plain - - def scan_tokens tokens, options - tokens << [scan_until(/\z/), :plain] - end - - end - -end -end +module CodeRay +module Scanners + + class Plaintext < Scanner + + register_for :plaintext, :plain + + def scan_tokens tokens, options + tokens << [scan_until(/\z/), :plain] + end + + end + +end +end diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index b1e0d1b..c601011 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -1,216 +1,216 @@ -module CodeRay -module Scanners - - module Ruby::Patterns # :nodoc: - - RESERVED_WORDS = %w[ - and def end in or unless begin - defined? ensure module redo super until - BEGIN break do next rescue then - when END case else for retry - while alias class elsif if not return - undef yield - ] - - DEF_KEYWORDS = %w[ def ] - UNDEF_KEYWORDS = %w[ undef ] - MODULE_KEYWORDS = %w[class module] - DEF_NEW_STATE = WordList.new(:initial). - add(DEF_KEYWORDS, :def_expected). - add(UNDEF_KEYWORDS, :undef_expected). - add(MODULE_KEYWORDS, :module_expected) - - IDENTS_ALLOWING_REGEXP = %w[ - and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split - ] - REGEXP_ALLOWED = WordList.new(false). - add(IDENTS_ALLOWING_REGEXP, :set) - - PREDEFINED_CONSTANTS = %w[ - nil true false self - DATA ARGV ARGF __FILE__ __LINE__ - ] - - IDENT_KIND = WordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(PREDEFINED_CONSTANTS, :pre_constant) - - IDENT = /[a-z_][\w_]*/i - - METHOD_NAME = / #{IDENT} [?!]? /ox - METHOD_NAME_OPERATOR = / - \*\*? # multiplication and power - | [-+]@? # plus, minus - | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde - | \[\]=? # array getter and setter - | << | >> # append or shift left, shift right - | <=?>? | >=? # comparison, rocket operator - | ===? # simple equality and case equality - /ox - METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox - INSTANCE_VARIABLE = / @ #{IDENT} /ox - CLASS_VARIABLE = / @@ #{IDENT} /ox - OBJECT_VARIABLE = / @@? #{IDENT} /ox - GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox - PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox - VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox - - QUOTE_TO_TYPE = { - '`' => :shell, - '/'=> :regexp, - } - QUOTE_TO_TYPE.default = :string - - REGEXP_MODIFIERS = /[mixounse]*/ - REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ - - DECIMAL = /\d+(?:_\d+)*/ - OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ - HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ - BINARY = /0b[01]+(?:_[01]+)*/ - - EXPONENT = / [eE] [+-]? #{DECIMAL} /ox - FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox - FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox - NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox - - SYMBOL = / - : - (?: - #{METHOD_NAME_EX} - | #{PREFIX_VARIABLE} - | ['"] - ) - /ox - - # TODO investigste \M, \c and \C escape sequences - # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) ) - # assert_equal(225, ?\M-a) - # assert_equal(129, ?\M-\C-a) - ESCAPE = / - [abefnrstv] - | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M- - | [0-7]{1,3} - | x[0-9A-Fa-f]{1,2} - | . - /mx - - CHARACTER = / - \? - (?: - [^\s\\] - | \\ #{ESCAPE} - ) - /mx - - # NOTE: This is not completely correct, but - # nobody needs heredoc delimiters ending with \n. - HEREDOC_OPEN = / - << (-)? # $1 = float - (?: - ( [A-Za-z_0-9]+ ) # $2 = delim - | - ( ["'`] ) # $3 = quote, type - ( [^\n]*? ) \3 # $4 = delim - ) - /mx - - RUBYDOC = / - =begin (?!\S) - .*? - (?: \Z | ^=end (?!\S) [^\n]* ) - /mx - - DATA = / - __END__$ - .*? - (?: \Z | (?=^\#CODE) ) - /mx - - RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo - - RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x - - # FIXME: \s and = are only a workaround, they are still allowed - # as delimiters. - FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx - FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx - - FancyStringType = { - 'q' => [:string, false], - 'Q' => [:string, true], - 'r' => [:regexp, true], - 's' => [:symbol, false], - 'x' => [:shell, true] - } - FancyStringType['w'] = FancyStringType['q'] - FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q'] - - class StringState < Struct.new :type, :interpreted, :delim, :heredoc, - :paren, :paren_depth, :pattern, :next_state - - CLOSING_PAREN = Hash[ *%w[ - ( ) - [ ] - < > - { } - ] ] - - CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with << - OPENING_PAREN = CLOSING_PAREN.invert - - STRING_PATTERN = Hash.new { |h, k| - delim, interpreted = *k - delim_pattern = Regexp.escape(delim.dup) - if closing_paren = CLOSING_PAREN[delim] - delim_pattern << Regexp.escape(closing_paren) - end - - - special_escapes = - case interpreted - when :regexp_symbols - '| ' + REGEXP_SYMBOLS.source - when :words - '| \s' - end - - h[k] = - if interpreted and not delim == '#' - / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx - else - / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx - end - } - - HEREDOC_PATTERN = Hash.new { |h, k| - delim, interpreted, indented = *k - delim_pattern = Regexp.escape(delim.dup) - delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x - h[k] = - if interpreted - / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc - else - / (?= #{delim_pattern}() | \\ ) /mx - end - } - - def initialize kind, interpreted, delim, heredoc = false - if heredoc - pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ] - delim = nil - else - pattern = STRING_PATTERN[ [delim, interpreted] ] - if paren = CLOSING_PAREN[delim] - delim, paren = paren, delim - paren_depth = 1 - end - end - super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial - end - end unless defined? StringState - - end - -end -end +module CodeRay +module Scanners + + module Ruby::Patterns # :nodoc: + + RESERVED_WORDS = %w[ + and def end in or unless begin + defined? ensure module redo super until + BEGIN break do next rescue then + when END case else for retry + while alias class elsif if not return + undef yield + ] + + DEF_KEYWORDS = %w[ def ] + UNDEF_KEYWORDS = %w[ undef ] + MODULE_KEYWORDS = %w[class module] + DEF_NEW_STATE = WordList.new(:initial). + add(DEF_KEYWORDS, :def_expected). + add(UNDEF_KEYWORDS, :undef_expected). + add(MODULE_KEYWORDS, :module_expected) + + IDENTS_ALLOWING_REGEXP = %w[ + and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split + ] + REGEXP_ALLOWED = WordList.new(false). + add(IDENTS_ALLOWING_REGEXP, :set) + + PREDEFINED_CONSTANTS = %w[ + nil true false self + DATA ARGV ARGF __FILE__ __LINE__ + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_CONSTANTS, :pre_constant) + + IDENT = /[a-z_][\w_]*/i + + METHOD_NAME = / #{IDENT} [?!]? /ox + METHOD_NAME_OPERATOR = / + \*\*? # multiplication and power + | [-+]@? # plus, minus + | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde + | \[\]=? # array getter and setter + | << | >> # append or shift left, shift right + | <=?>? | >=? # comparison, rocket operator + | ===? # simple equality and case equality + /ox + METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox + INSTANCE_VARIABLE = / @ #{IDENT} /ox + CLASS_VARIABLE = / @@ #{IDENT} /ox + OBJECT_VARIABLE = / @@? #{IDENT} /ox + GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox + PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox + VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox + + QUOTE_TO_TYPE = { + '`' => :shell, + '/'=> :regexp, + } + QUOTE_TO_TYPE.default = :string + + REGEXP_MODIFIERS = /[mixounse]*/ + REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ + + DECIMAL = /\d+(?:_\d+)*/ + OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ + HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ + BINARY = /0b[01]+(?:_[01]+)*/ + + EXPONENT = / [eE] [+-]? #{DECIMAL} /ox + FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox + FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox + NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox + + SYMBOL = / + : + (?: + #{METHOD_NAME_EX} + | #{PREFIX_VARIABLE} + | ['"] + ) + /ox + + # TODO investigste \M, \c and \C escape sequences + # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) ) + # assert_equal(225, ?\M-a) + # assert_equal(129, ?\M-\C-a) + ESCAPE = / + [abefnrstv] + | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M- + | [0-7]{1,3} + | x[0-9A-Fa-f]{1,2} + | . + /mx + + CHARACTER = / + \? + (?: + [^\s\\] + | \\ #{ESCAPE} + ) + /mx + + # NOTE: This is not completely correct, but + # nobody needs heredoc delimiters ending with \n. + HEREDOC_OPEN = / + << (-)? # $1 = float + (?: + ( [A-Za-z_0-9]+ ) # $2 = delim + | + ( ["'`] ) # $3 = quote, type + ( [^\n]*? ) \3 # $4 = delim + ) + /mx + + RUBYDOC = / + =begin (?!\S) + .*? + (?: \Z | ^=end (?!\S) [^\n]* ) + /mx + + DATA = / + __END__$ + .*? + (?: \Z | (?=^\#CODE) ) + /mx + + RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo + + RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x + + # FIXME: \s and = are only a workaround, they are still allowed + # as delimiters. + FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx + FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx + + FancyStringType = { + 'q' => [:string, false], + 'Q' => [:string, true], + 'r' => [:regexp, true], + 's' => [:symbol, false], + 'x' => [:shell, true] + } + FancyStringType['w'] = FancyStringType['q'] + FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q'] + + class StringState < Struct.new :type, :interpreted, :delim, :heredoc, + :paren, :paren_depth, :pattern, :next_state + + CLOSING_PAREN = Hash[ *%w[ + ( ) + [ ] + < > + { } + ] ] + + CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with << + OPENING_PAREN = CLOSING_PAREN.invert + + STRING_PATTERN = Hash.new { |h, k| + delim, interpreted = *k + delim_pattern = Regexp.escape(delim.dup) + if closing_paren = CLOSING_PAREN[delim] + delim_pattern << Regexp.escape(closing_paren) + end + + + special_escapes = + case interpreted + when :regexp_symbols + '| ' + REGEXP_SYMBOLS.source + when :words + '| \s' + end + + h[k] = + if interpreted and not delim == '#' + / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx + else + / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx + end + } + + HEREDOC_PATTERN = Hash.new { |h, k| + delim, interpreted, indented = *k + delim_pattern = Regexp.escape(delim.dup) + delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x + h[k] = + if interpreted + / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc + else + / (?= #{delim_pattern}() | \\ ) /mx + end + } + + def initialize kind, interpreted, delim, heredoc = false + if heredoc + pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ] + delim = nil + else + pattern = STRING_PATTERN[ [delim, interpreted] ] + if paren = CLOSING_PAREN[delim] + delim, paren = paren, delim + paren_depth = 1 + end + end + super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial + end + end unless defined? StringState + + end + +end +end diff --git a/lib/coderay/scanners/xml.rb b/lib/coderay/scanners/xml.rb index 5ce8ce9..ff923fb 100644 --- a/lib/coderay/scanners/xml.rb +++ b/lib/coderay/scanners/xml.rb @@ -1,18 +1,18 @@ -module CodeRay -module Scanners - - load :html - - # XML Scanner - # - # $Id$ - # - # Currently this is the same scanner as Scanners::HTML. - class XML < HTML - - register_for :xml - - end - -end -end +module CodeRay +module Scanners + + load :html + + # XML Scanner + # + # $Id$ + # + # Currently this is the same scanner as Scanners::HTML. + class XML < HTML + + register_for :xml + + end + +end +end diff --git a/lib/coderay/style.rb b/lib/coderay/style.rb index 057f8d4..c2977c5 100644 --- a/lib/coderay/style.rb +++ b/lib/coderay/style.rb @@ -1,20 +1,20 @@ -module CodeRay - - # This module holds the Style class and its subclasses. - # - # See Plugin. - module Styles - extend PluginHost - plugin_path File.dirname(__FILE__), 'styles' - - class Style - extend Plugin - plugin_host Styles - - DEFAULT_OPTIONS = { } - - end - - end - -end +module CodeRay + + # This module holds the Style class and its subclasses. + # + # See Plugin. + module Styles + extend PluginHost + plugin_path File.dirname(__FILE__), 'styles' + + class Style + extend Plugin + plugin_host Styles + + DEFAULT_OPTIONS = { } + + end + + end + +end diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb index 8b8c692..c8c62e0 100644 --- a/lib/coderay/tokens.rb +++ b/lib/coderay/tokens.rb @@ -1,322 +1,322 @@ -module CodeRay - - # = Tokens - # - # The Tokens class represents a list of tokens returnd from - # a Scanner. - # - # A token is not a special object, just a two-element Array - # consisting of - # * the _token_ _kind_ (a Symbol representing the type of the token) - # * the _token_ _text_ (the original source of the token in a String) - # - # A token looks like this: - # - # [:comment, '# It looks like this'] - # [:float, '3.1415926'] - # [:error, 'äöü'] - # - # Some scanners also yield some kind of sub-tokens, represented by special - # token texts, namely :open and :close . - # - # The Ruby scanner, for example, splits "a string" into: - # - # [ - # [:open, :string], - # [:delimiter, '"'], - # [:content, 'a string'], - # [:delimiter, '"'], - # [:close, :string] - # ] - # - # Tokens is also the interface between Scanners and Encoders: - # The input is split and saved into a Tokens object. The Encoder - # then builds the output from this object. - # - # Thus, the syntax below becomes clear: - # - # CodeRay.scan('price = 2.59', :ruby).html - # # the Tokens object is here -------^ - # - # See how small it is? ;) - # - # Tokens gives you the power to handle pre-scanned code very easily: - # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string - # that you put in your DB. - # - # Tokens' subclass TokenStream allows streaming to save memory. - class Tokens < Array - - class << self - - # Convert the token to a string. - # - # This format is used by Encoders.Tokens. - # It can be reverted using read_token. - def write_token text, type - if text.is_a? String - "#{type}\t#{escape(text)}\n" - else - ":#{text}\t#{type}\t\n" - end - end - - # Read a token from the string. - # - # Inversion of write_token. - # - # TODO Test this! - def read_token token - type, text = token.split("\t", 2) - if type[0] == ?: - [text.to_sym, type[1..-1].to_sym] - else - [type.to_sym, unescape(text)] - end - end - - # Escapes a string for use in write_token. - def escape text - text.gsub(/[\n\\]/, '\\\\\&') - end - - # Unescapes a string created by escape. - def unescape text - text.gsub(/\\[\n\\]/) { |m| m[1,1] } - end - - end - - # Whether the object is a TokenStream. - # - # Returns false. - def stream? - false - end - - # Iterates over all tokens. - # - # If a filter is given, only tokens of that kind are yielded. - def each kind_filter = nil, &block - unless kind_filter - super(&block) - else - super() do |text, kind| - next unless kind == kind_filter - yield text, kind - end - end - end - - # Iterates over all text tokens. - # Range tokens like [:open, :string] are left out. - # - # Example: - # tokens.each_text_token { |text, kind| text.replace html_escape(text) } - def each_text_token - each do |text, kind| - next unless text.respond_to? :to_str - yield text, kind - end - end - - # Encode the tokens using encoder. - # - # encoder can be - # * a symbol like :html oder :statistic - # * an Encoder class - # * an Encoder object - # - # options are passed to the encoder. - def encode encoder, options = {} - unless encoder.is_a? Encoders::Encoder - unless encoder.is_a? Class - encoder_class = Encoders[encoder] - end - encoder = encoder_class.new options - end - encoder.encode_tokens self, options - end - - - # Turn into a string using Encoders::Text. - # - # +options+ are passed to the encoder if given. - def to_s options = {} - encode :text, options - end - - - # Redirects unknown methods to encoder calls. - # - # For example, if you call +tokens.html+, the HTML encoder - # is used to highlight the tokens. - def method_missing meth, options = {} - Encoders[meth].new(options).encode_tokens self - end - - # Returns the tokens compressed by joining consecutive - # tokens of the same kind. - # - # This can not be undone, but should yield the same output - # in most Encoders. It basically makes the output smaller. - # - # Combined with dump, it saves space for the cost of time. - # - # If the scanner is written carefully, this is not required - - # for example, consecutive //-comment lines could already be - # joined in one comment token by the Scanner. - def optimize - print ' Tokens#optimize: before: %d - ' % size if $DEBUG - last_kind = last_text = nil - new = self.class.new - each do |text, kind| - if text.is_a? String - if kind == last_kind - last_text << text - else - new << [last_text, last_kind] if last_kind - last_text = text - last_kind = kind - end - else - new << [last_text, last_kind] if last_kind - last_kind = last_text = nil - new << [text, kind] - end - end - new << [last_text, last_kind] if last_kind - print 'after: %d (%d saved = %2.0f%%)' % - [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG - new - end - - # Compact the object itself; see optimize. - def optimize! - replace optimize - end - - # Dumps the object into a String that can be saved - # in files or databases. - # - # The dump is created with Marshal.dump; - # In addition, it is gzipped using GZip.gzip. - # - # The returned String object includes Undumping - # so it has an #undump method. See Tokens.load. - # - # You can configure the level of compression, - # but the default value 7 should be what you want - # in most cases as it is a good comprimise between - # speed and compression rate. - # - # See GZip module. - def dump gzip_level = 7 - require 'coderay/helpers/gzip_simple' - dump = Marshal.dump self - dump = dump.gzip gzip_level - dump.extend Undumping - end - - # The total size of the tokens. - # Should be equal to the input size before - # scanning. - def text_size - map { |t, k| t }.join.size - end - - # Include this module to give an object an #undump - # method. - # - # The string returned by Tokens.dump includes Undumping. - module Undumping - # Calls Tokens.load with itself. - def undump - Tokens.load self - end - end - - # Undump the object using Marshal.load, then - # unzip it using GZip.gunzip. - # - # The result is commonly a Tokens object, but - # this is not guaranteed. - def Tokens.load dump - require 'coderay/helpers/gzip_simple' - dump = dump.gunzip - @dump = Marshal.load dump - end - - end - - - # = TokenStream - # - # The TokenStream class is a fake Array without elements. - # - # It redirects the method << to a block given at creation. - # - # This allows scanners and Encoders to use streaming (no - # tokens are saved, the input is highlighted the same time it - # is scanned) with the same code. - # - # See CodeRay.encode_stream and CodeRay.scan_stream - class TokenStream < Tokens - - # Whether the object is a TokenStream. - # - # Returns true. - def stream? - true - end - - # The Array is empty, but size counts the tokens given by <<. - attr_reader :size - - # Creates a new TokenStream that calls +block+ whenever - # its << method is called. - # - # Example: - # - # require 'coderay' - # - # token_stream = CodeRay::TokenStream.new do |kind, text| - # puts 'kind: %s, text size: %d.' % [kind, text.size] - # end - # - # token_stream << [:regexp, '/\d+/'] - # #-> kind: rexpexp, text size: 5. - # - def initialize &block - raise ArgumentError, 'Block expected for streaming.' unless block - @callback = block - @size = 0 - end - - # Calls +block+ with +token+ and increments size. - # - # Returns self. - def << token - @callback.call token - @size += 1 - self - end - - # This method is not implemented due to speed reasons. Use Tokens. - def text_size - raise NotImplementedError, 'This method is not implemented due to speed reasons.' - end - - # A TokenStream cannot be dumped. Use Tokens. - def dump - raise NotImplementedError, 'A TokenStream cannot be dumped.' - end - - # A TokenStream cannot be optimized. Use Tokens. - def optimize - raise NotImplementedError, 'A TokenStream cannot be optimized.' - end - - end - -end +module CodeRay + + # = Tokens + # + # The Tokens class represents a list of tokens returnd from + # a Scanner. + # + # A token is not a special object, just a two-element Array + # consisting of + # * the _token_ _kind_ (a Symbol representing the type of the token) + # * the _token_ _text_ (the original source of the token in a String) + # + # A token looks like this: + # + # [:comment, '# It looks like this'] + # [:float, '3.1415926'] + # [:error, 'äöü'] + # + # Some scanners also yield some kind of sub-tokens, represented by special + # token texts, namely :open and :close . + # + # The Ruby scanner, for example, splits "a string" into: + # + # [ + # [:open, :string], + # [:delimiter, '"'], + # [:content, 'a string'], + # [:delimiter, '"'], + # [:close, :string] + # ] + # + # Tokens is also the interface between Scanners and Encoders: + # The input is split and saved into a Tokens object. The Encoder + # then builds the output from this object. + # + # Thus, the syntax below becomes clear: + # + # CodeRay.scan('price = 2.59', :ruby).html + # # the Tokens object is here -------^ + # + # See how small it is? ;) + # + # Tokens gives you the power to handle pre-scanned code very easily: + # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string + # that you put in your DB. + # + # Tokens' subclass TokenStream allows streaming to save memory. + class Tokens < Array + + class << self + + # Convert the token to a string. + # + # This format is used by Encoders.Tokens. + # It can be reverted using read_token. + def write_token text, type + if text.is_a? String + "#{type}\t#{escape(text)}\n" + else + ":#{text}\t#{type}\t\n" + end + end + + # Read a token from the string. + # + # Inversion of write_token. + # + # TODO Test this! + def read_token token + type, text = token.split("\t", 2) + if type[0] == ?: + [text.to_sym, type[1..-1].to_sym] + else + [type.to_sym, unescape(text)] + end + end + + # Escapes a string for use in write_token. + def escape text + text.gsub(/[\n\\]/, '\\\\\&') + end + + # Unescapes a string created by escape. + def unescape text + text.gsub(/\\[\n\\]/) { |m| m[1,1] } + end + + end + + # Whether the object is a TokenStream. + # + # Returns false. + def stream? + false + end + + # Iterates over all tokens. + # + # If a filter is given, only tokens of that kind are yielded. + def each kind_filter = nil, &block + unless kind_filter + super(&block) + else + super() do |text, kind| + next unless kind == kind_filter + yield text, kind + end + end + end + + # Iterates over all text tokens. + # Range tokens like [:open, :string] are left out. + # + # Example: + # tokens.each_text_token { |text, kind| text.replace html_escape(text) } + def each_text_token + each do |text, kind| + next unless text.respond_to? :to_str + yield text, kind + end + end + + # Encode the tokens using encoder. + # + # encoder can be + # * a symbol like :html oder :statistic + # * an Encoder class + # * an Encoder object + # + # options are passed to the encoder. + def encode encoder, options = {} + unless encoder.is_a? Encoders::Encoder + unless encoder.is_a? Class + encoder_class = Encoders[encoder] + end + encoder = encoder_class.new options + end + encoder.encode_tokens self, options + end + + + # Turn into a string using Encoders::Text. + # + # +options+ are passed to the encoder if given. + def to_s options = {} + encode :text, options + end + + + # Redirects unknown methods to encoder calls. + # + # For example, if you call +tokens.html+, the HTML encoder + # is used to highlight the tokens. + def method_missing meth, options = {} + Encoders[meth].new(options).encode_tokens self + end + + # Returns the tokens compressed by joining consecutive + # tokens of the same kind. + # + # This can not be undone, but should yield the same output + # in most Encoders. It basically makes the output smaller. + # + # Combined with dump, it saves space for the cost of time. + # + # If the scanner is written carefully, this is not required - + # for example, consecutive //-comment lines could already be + # joined in one comment token by the Scanner. + def optimize + print ' Tokens#optimize: before: %d - ' % size if $DEBUG + last_kind = last_text = nil + new = self.class.new + each do |text, kind| + if text.is_a? String + if kind == last_kind + last_text << text + else + new << [last_text, last_kind] if last_kind + last_text = text + last_kind = kind + end + else + new << [last_text, last_kind] if last_kind + last_kind = last_text = nil + new << [text, kind] + end + end + new << [last_text, last_kind] if last_kind + print 'after: %d (%d saved = %2.0f%%)' % + [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG + new + end + + # Compact the object itself; see optimize. + def optimize! + replace optimize + end + + # Dumps the object into a String that can be saved + # in files or databases. + # + # The dump is created with Marshal.dump; + # In addition, it is gzipped using GZip.gzip. + # + # The returned String object includes Undumping + # so it has an #undump method. See Tokens.load. + # + # You can configure the level of compression, + # but the default value 7 should be what you want + # in most cases as it is a good comprimise between + # speed and compression rate. + # + # See GZip module. + def dump gzip_level = 7 + require 'coderay/helpers/gzip_simple' + dump = Marshal.dump self + dump = dump.gzip gzip_level + dump.extend Undumping + end + + # The total size of the tokens. + # Should be equal to the input size before + # scanning. + def text_size + map { |t, k| t }.join.size + end + + # Include this module to give an object an #undump + # method. + # + # The string returned by Tokens.dump includes Undumping. + module Undumping + # Calls Tokens.load with itself. + def undump + Tokens.load self + end + end + + # Undump the object using Marshal.load, then + # unzip it using GZip.gunzip. + # + # The result is commonly a Tokens object, but + # this is not guaranteed. + def Tokens.load dump + require 'coderay/helpers/gzip_simple' + dump = dump.gunzip + @dump = Marshal.load dump + end + + end + + + # = TokenStream + # + # The TokenStream class is a fake Array without elements. + # + # It redirects the method << to a block given at creation. + # + # This allows scanners and Encoders to use streaming (no + # tokens are saved, the input is highlighted the same time it + # is scanned) with the same code. + # + # See CodeRay.encode_stream and CodeRay.scan_stream + class TokenStream < Tokens + + # Whether the object is a TokenStream. + # + # Returns true. + def stream? + true + end + + # The Array is empty, but size counts the tokens given by <<. + attr_reader :size + + # Creates a new TokenStream that calls +block+ whenever + # its << method is called. + # + # Example: + # + # require 'coderay' + # + # token_stream = CodeRay::TokenStream.new do |kind, text| + # puts 'kind: %s, text size: %d.' % [kind, text.size] + # end + # + # token_stream << [:regexp, '/\d+/'] + # #-> kind: rexpexp, text size: 5. + # + def initialize &block + raise ArgumentError, 'Block expected for streaming.' unless block + @callback = block + @size = 0 + end + + # Calls +block+ with +token+ and increments size. + # + # Returns self. + def << token + @callback.call token + @size += 1 + self + end + + # This method is not implemented due to speed reasons. Use Tokens. + def text_size + raise NotImplementedError, 'This method is not implemented due to speed reasons.' + end + + # A TokenStream cannot be dumped. Use Tokens. + def dump + raise NotImplementedError, 'A TokenStream cannot be dumped.' + end + + # A TokenStream cannot be optimized. Use Tokens. + def optimize + raise NotImplementedError, 'A TokenStream cannot be optimized.' + end + + end + +end diff --git a/rake_tasks/test.rake b/rake_tasks/test.rake index 0808edb..c29c263 100644 --- a/rake_tasks/test.rake +++ b/rake_tasks/test.rake @@ -6,7 +6,7 @@ namespace :test do desc 'Test CodeRay' task :scanners do - system 'ruby -w ./test/suite.rb' + system 'ruby -wd ./test/suite.rb' end end -- cgit v1.2.1