From 48e144a20829faaeca9a7db8fbc6128f1f5d7297 Mon Sep 17 00:00:00 2001 From: murphy Date: Tue, 4 Oct 2005 04:04:07 +0000 Subject: Two new encoders: debug and xml. encoder.rb: new token handling encoders/statistic.rb: using new handling ruby_helper.rb: small improvements ruby.rb: - escapes in subtoken - Float detection changed - some multi-char operators are now scanned as one token - def and module definition handling changed bin/coderay: improved, new interface (still in progress) plugin.rb: more expressive load error message --- lib/coderay/encoder.rb | 25 +++++++++-- lib/coderay/encoders/debug.rb | 38 ++++++++++++++++ lib/coderay/encoders/statistic.rb | 25 ++++++----- lib/coderay/encoders/xml.rb | 70 +++++++++++++++++++++++++++++ lib/coderay/helpers/plugin.rb | 4 +- lib/coderay/scanners/helpers/ruby_helper.rb | 18 ++++---- lib/coderay/scanners/ruby.rb | 31 +++++++------ 7 files changed, 172 insertions(+), 39 deletions(-) create mode 100644 lib/coderay/encoders/debug.rb create mode 100644 lib/coderay/encoders/xml.rb (limited to 'lib/coderay') diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb index 74e1582..448a45b 100644 --- a/lib/coderay/encoder.rb +++ b/lib/coderay/encoder.rb @@ -122,11 +122,28 @@ module CodeRay # Called with +text+ and +kind+ of the currently scanned token. # For simple scanners, it's enougth to implement this method. # - # Raises a NotImplementedError exception if it is not overwritten - # in subclass. + # By default, it calls text_token or block_token, depending on + # whether +text+ is a String. def token text, kind - raise NotImplementedError, - "#{self.class}#token not implemented." + if text.is_a? String + text_token text, kind + else + block_token text, kind + end + end + + def text_token text, kind + end + + def block_token action, kind + case action + when :open + open_token kind + when :close + close_token kind + else + raise 'unknown block action: %p' % action + end end # Called with merged options after encoding starts. diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb new file mode 100644 index 0000000..b084733 --- /dev/null +++ b/lib/coderay/encoders/debug.rb @@ -0,0 +1,38 @@ +module CodeRay + module Encoders + + # = Debug Encoder + class Debug < Encoder + + include Streamable + register_for :debug + + FILE_EXTENSION = 'debug' + + protected + def text_token text, kind + @out << + if kind == :space + text + else + text = text.gsub(/[)\\]/, '\\\\\0') + "#{kind}(#{text})" + end + end + + def block_token action, kind + @out << super + end + + def open_token kind + "#{kind}<" + end + + def close_token kind + ">" + end + + end + + end +end diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb index 0685c03..cd26272 100644 --- a/lib/coderay/encoders/statistic.rb +++ b/lib/coderay/encoders/statistic.rb @@ -22,17 +22,22 @@ module CodeRay module Encoders super end - def token text, type + def text_token text, kind + @real_token_count += 1 unless kind == :space + @type_stats[kind].count += 1 + @type_stats[kind].size += text.size + @type_stats['TOTAL'].size += text.size + end + + # TODO Hierarchy handling + def block_token action, kind + #@content_type = kind + @type_stats['open/close'].count += 1 + end + + def token text, kind + super @type_stats['TOTAL'].count += 1 - if text.is_a? String - @real_token_count += 1 unless type == :space - @type_stats[type].count += 1 - @type_stats[type].size += text.size - @type_stats['TOTAL'].size += text.size - else - @content_type = type - @type_stats['open/close'].count += 1 - end end STATS = <<-STATS diff --git a/lib/coderay/encoders/xml.rb b/lib/coderay/encoders/xml.rb new file mode 100644 index 0000000..5596f46 --- /dev/null +++ b/lib/coderay/encoders/xml.rb @@ -0,0 +1,70 @@ +module CodeRay + module Encoders + + # = Debug Encoder + class XML < Encoder + + include Streamable + register_for :xml + + FILE_EXTENSION = 'xml' + + require 'rexml/document' + + DEFAULT_OPTIONS = { + :tab_width => 8, + :pretty => -1, + :transitive => false, + } + + protected + + def setup options + @out = '' + @doc = REXML::Document.new + @doc << REXML::XMLDecl.new + @tab_width = options[:tab_width] + @root = @node = @doc.add_element('coderay-tokens') + end + + def finish options + @doc.write @out, options[:pretty], options[:transitive], true + @out + end + + def text_token text, kind + if kind == :space + token = @node + else + token = @node.add_element kind.to_s + end + text.scan(/(\x20+)|(\t+)|(\n)|[^\x20\t\n]+/) do |space, tab, nl| + case + when space + token << REXML::Text.new(space, true) + when tab + token << REXML::Text.new(tab, true) + when nl + token << REXML::Text.new(nl, true) + else + token << REXML::Text.new($&) + end + end + end + + def open_token kind + @node = @node.add_element kind.to_s + end + + def close_token kind + if @node == @root + raise 'no token to close!' + end + @node = @node.parent + end + + end + + end +end + diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb index aacde99..2518e5a 100644 --- a/lib/coderay/helpers/plugin.rb +++ b/lib/coderay/helpers/plugin.rb @@ -92,8 +92,8 @@ module PluginHost begin $stderr.puts 'Loading plugin: ' + path if $DEBUG require path - rescue LoadError - raise PluginNotFound, "Plugin #{id.inspect} not found." + rescue LoadError => boom + raise PluginNotFound, 'Could not load plugin %p: %s' % [id, boom] else # Plugin should have registered by now unless h.has_key? id diff --git a/lib/coderay/scanners/helpers/ruby_helper.rb b/lib/coderay/scanners/helpers/ruby_helper.rb index 241b392..a44ca79 100644 --- a/lib/coderay/scanners/helpers/ruby_helper.rb +++ b/lib/coderay/scanners/helpers/ruby_helper.rb @@ -60,19 +60,17 @@ module CodeRay module Scanners QUOTE_TO_TYPE.default = :string REGEXP_MODIFIERS = /[mixounse]*/ - REGEXP_SYMBOLS = / - [|?*+?(){}\[\].^$] - /x + REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ - DECIMAL = /\d+(?:_\d+)*/ # doesn't recognize 09 as octal error + DECIMAL = /\d+(?:_\d+)*/ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ BINARY = /0b[01]+(?:_[01]+)*/ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox - FLOAT_OR_INT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? )? /ox - FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) /ox - NUMERIC = / #{OCTAL} | #{HEXADECIMAL} | #{BINARY} | #{FLOAT_OR_INT} /ox + FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox + FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox + NUMERIC = / (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} /ox SYMBOL = / : @@ -103,7 +101,7 @@ module CodeRay module Scanners ) /mx - # NOTE: This is not completel correct, but + # NOTE: This is not completely correct, but # nobody needs heredoc delimiters ending with \n. HEREDOC_OPEN = / << (-)? # $1 = float @@ -115,7 +113,7 @@ module CodeRay module Scanners ) /mx - RDOC = / + RUBYDOC = / =begin (?!\S) .*? (?: \Z | ^=end (?!\S) [^\n]* ) @@ -127,6 +125,8 @@ module CodeRay module Scanners (?: \Z | (?=^\#CODE) ) /mx + RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo + RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x FANCY_START = / % ( [qQwWxsr] | (?![\w\s=]) ) (.) /mox diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 72e59bd..a50893a 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -128,13 +128,14 @@ module CodeRay module Scanners fancy_allowed = regexp_allowed = true state = :initial depth = 1 - tokens << [match + getch, :escape] + tokens << [:open, :escape] + tokens << [match + getch, :delimiter] when ?$, ?@ tokens << [match, :escape] last_state = state # scan one token as normal code, then return here state = :initial else - raise "else-case # reached; #%p not handled" % peek(1), tokens + raise 'else-case # reached; #%p not handled' % peek(1), tokens end when state.paren @@ -145,7 +146,7 @@ module CodeRay module Scanners tokens << [match, :function] else - raise "else-case \" reached; %p not handled, state = %p" % [match, state], tokens + raise 'else-case " reached; %p not handled, state = %p' % [match, state], tokens end next @@ -153,7 +154,7 @@ module CodeRay module Scanners else # {{{ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or - ( bol? and match = scan(/ #{DATA} | #{RDOC} /ox) ) + ( bol? and match = scan(/#{RUBYDOC_OR_DATA}/o) ) fancy_allowed = true case m = match[0] when ?\s, ?\t, ?\f @@ -175,7 +176,7 @@ module CodeRay module Scanners type = :comment regexp_allowed = true else - raise "else-case _ reached, because case %p was not handled" % [matched[0].chr], tokens + raise 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens end tokens << [match, type] next @@ -195,7 +196,9 @@ module CodeRay module Scanners depth -= 1 if depth == 0 state, depth, heredocs = states.pop + tokens << [match + getch, :delimiter] type = :escape + match = :close end end end @@ -221,7 +224,7 @@ module CodeRay module Scanners elsif match = scan(/#{INSTANCE_VARIABLE}/o) type = :instance_variable - elsif regexp_allowed and match = scan(/ \/ /mx) + elsif regexp_allowed and match = scan(/\//) tokens << [:open, :regexp] type = :delimiter interpreted = true @@ -232,7 +235,7 @@ module CodeRay module Scanners end elsif match = scan(/#{NUMERIC}/o) - type = if match[/#{FLOAT}/o] then :float else :integer end + type = if self[1] then :float else :integer end elsif fancy_allowed and match = scan(/#{SYMBOL}/o) case match[1] @@ -265,7 +268,7 @@ module CodeRay module Scanners elsif fancy_allowed and match = scan(/#{CHARACTER}/o) type = :integer - elsif match = scan(/ [\/%?)? | [?:] /x) regexp_allowed = fancy_allowed = :set type = :operator @@ -290,25 +293,25 @@ module CodeRay module Scanners end elsif state == :def_expected - if match = scan(/ (?: #{VARIABLE} (?: ::#{IDENT} )* \. )? #{METHOD_NAME_EX} /ox) + state = :initial + if match = scan(/#{METHOD_NAME_EX}/o) type = :method else - match = getch + next end - state = :initial elsif state == :module_expected if match = scan(/<