diff options
Diffstat (limited to 'lib/coderay')
36 files changed, 1460 insertions, 1630 deletions
diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb index 3ae2924..82545c4 100644 --- a/lib/coderay/encoder.rb +++ b/lib/coderay/encoder.rb @@ -31,11 +31,6 @@ module CodeRay class << self - # Returns if the Encoder can be used in streaming mode. - def streamable? - is_a? Streamable - end - # If FILE_EXTENSION isn't defined, this method returns the # downcase class name instead. def const_missing sym @@ -69,6 +64,7 @@ module CodeRay @options = self.class::DEFAULT_OPTIONS.merge options raise "I am only the basic Encoder class. I can't encode "\ "anything. :( Use my subclasses." if self.class == Encoder + $ALREADY_WARNED_OLD_INTERFACE = false end # Encode a Tokens object. @@ -95,24 +91,25 @@ module CodeRay # Encode the given +code+ using the Scanner for +lang+ in # streaming mode. def encode_stream code, lang, options = {} - raise NotStreamableError, self unless kind_of? Streamable options = @options.merge options setup options scanner_options = CodeRay.get_scanner_options options + scanner_options[:tokens] = self @token_stream = - CodeRay.scan_stream code, lang, scanner_options, &self + CodeRay.scan_stream code, lang, scanner_options finish options end - # Behave like a proc. The token method is converted to a proc. - def to_proc - method(:token).to_proc - end - # Return the default file extension for outputs of this encoder. def file_extension self.class::FILE_EXTENSION end + + def << token + warn 'Using old Tokens#<< interface.' unless $ALREADY_WARNED_OLD_INTERFACE + $ALREADY_WARNED_OLD_INTERFACE = true + self.token(*token) + end protected @@ -123,90 +120,80 @@ module CodeRay def setup options @out = '' end - + + public + # Called with +content+ and +kind+ of the currently scanned token. # For simple scanners, it's enougth to implement this method. # - # By default, it calls text_token or block_token, depending on - # whether +content+ is a String. + # By default, it calls text_token, begin_group, end_group, begin_line, + # or end_line, depending on the +content+. def token content, kind - encoded_token = - if content.is_a? ::String - text_token content, kind - elsif content.is_a? ::Symbol - block_token content, kind - else - raise 'Unknown token content type: %p' % [content] - end - append_encoded_token_to_output encoded_token - end - - def append_encoded_token_to_output encoded_token - @out << encoded_token if encoded_token && defined?(@out) && @out - end - - # Called for each text token ([text, kind]), where text is a String. - def text_token text, kind - end - - # Called for each block (non-text) token ([action, kind]), - # where +action+ is a Symbol. - # - # Calls open_token, close_token, begin_line, and end_line according to - # the value of +action+. - def block_token action, kind - case action - when :open - open_token kind - when :close - close_token kind + case content + when String + text_token content, kind + when :begin_group + begin_group kind + when :end_group + end_group kind when :begin_line begin_line kind when :end_line end_line kind else - raise 'unknown block action: %p' % action + raise 'Unknown token content type: %p' % [content] end end - # Called for each block token at the start of the block ([:open, kind]). - def open_token kind + # Called for each text token ([text, kind]), where text is a String. + def text_token text, kind end - # Called for each block token end of the block ([:close, kind]). - def close_token kind + # Starts a token group with the given +kind+. + def begin_group kind end - # Called for each line token block at the start of the line ([:begin_line, kind]). + # Ends a token group with the given +kind+. + def end_group kind + end + + # Starts a new line token group with the given +kind+. def begin_line kind end - # Called for each line token block at the end of the line ([:end_line, kind]). + # Ends a new line token group with the given +kind+. def end_line kind end - + + protected + # Called with merged options after encoding starts. # The return value is the result of encoding, typically @out. def finish options @out end - + # Do the encoding. # - # The already created +tokens+ object must be used; it can be a - # TokenStream or a Tokens object. - if RUBY_VERSION >= '1.9' - def compile tokens, options - for text, kind in tokens - token text, kind + # The already created +tokens+ object must be used; it must be a + # Tokens object. + def compile tokens, options = {} + content = nil + for item in tokens + if item.is_a? Array + warn 'two-element array tokens are deprecated' + content, item = *item + end + if content + token content, item + content = nil + else + content = item end end - else - def compile tokens, options - tokens.each(&self) - end + raise if content end - + end end diff --git a/lib/coderay/encoders/count.rb b/lib/coderay/encoders/count.rb index 2e60a89..451a7f8 100644 --- a/lib/coderay/encoders/count.rb +++ b/lib/coderay/encoders/count.rb @@ -1,25 +1,55 @@ +($:.unshift '../..'; require 'coderay') unless defined? CodeRay module CodeRay module Encoders # Returns the number of tokens. # - # Text and block tokens (:open etc.) are counted. + # Text and block tokens are counted. class Count < Encoder - + include Streamable register_for :count - + protected - + def setup options @out = 0 end - - def token text, kind + + def text_token text, kind + @out += 1 + end + + def begin_group kind @out += 1 end + alias end_group begin_group + alias begin_line begin_group + alias end_line begin_group end - + end end + +if $0 == __FILE__ + $VERBOSE = true + $: << File.join(File.dirname(__FILE__), '..') + eval DATA.read, nil, $0, __LINE__ + 4 +end + +__END__ +require 'test/unit' + +class CountTest < Test::Unit::TestCase + + def test_count + tokens = CodeRay.scan <<-RUBY.strip, :ruby +#!/usr/bin/env ruby +# a minimal Ruby program +puts "Hello world!" + RUBY + assert_equal 9, tokens.encode_with(:count) + end + +end
\ No newline at end of file diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb index 4c680d3..89e430f 100644 --- a/lib/coderay/encoders/debug.rb +++ b/lib/coderay/encoders/debug.rb @@ -19,31 +19,43 @@ module Encoders register_for :debug FILE_EXTENSION = 'raydebug' + + def initialize options = {} + super + @opened = [] + end - protected + public + def text_token text, kind if kind == :space - text + @out << text else text = text.gsub(/[)\\]/, '\\\\\0') # escape ) and \ - "#{kind}(#{text})" + @out << kind.to_s << '(' << text << ')' end end - def open_token kind - "#{kind}<" + def begin_group kind + @opened << kind + @out << kind.to_s << '<' end - def close_token kind - '>' + def end_group kind + if @opened.last != kind + puts @out + raise "we are inside #{@opened.inspect}, not #{kind}" + end + @opened.pop + @out << '>' end def begin_line kind - "#{kind}[" + @out << kind.to_s << '[' end def end_line kind - ']' + @out << ']' end end @@ -74,16 +86,16 @@ class DebugEncoderTest < Test::Unit::TestCase TEST_INPUT = CodeRay::Tokens[ ['10', :integer], ['(\\)', :operator], - [:open, :string], + [:begin_group, :string], ['test', :content], - [:close, :string], + [:end_group, :string], [:begin_line, :test], ["\n", :space], ["\n \t", :space], [" \n", :space], ["[]", :method], [:end_line, :test], - ] + ].flatten TEST_OUTPUT = <<-'DEBUG'.chomp integer(10)operator((\\\))string<content(test)>test[ diff --git a/lib/coderay/encoders/filter.rb b/lib/coderay/encoders/filter.rb index c1991cf..6b78ad3 100644 --- a/lib/coderay/encoders/filter.rb +++ b/lib/coderay/encoders/filter.rb @@ -16,15 +16,27 @@ module Encoders end def text_token text, kind - [text, kind] if include_text_token? text, kind + @out.text_token text, kind if include_text_token? text, kind end def include_text_token? text, kind true end - def block_token action, kind - [action, kind] if include_block_token? action, kind + def begin_group kind + @out.begin_group kind if include_block_token? :begin_group, kind + end + + def end_group kind + @out.end_group kind if include_block_token? :end_group, kind + end + + def begin_line kind + @out.begin_line kind if include_block_token? :begin_line, kind + end + + def end_line kind + @out.end_line kind if include_block_token? :end_line, kind end def include_block_token? action, kind @@ -59,7 +71,7 @@ class FilterTest < Test::Unit::TestCase def test_filtering_text_tokens tokens = CodeRay::Tokens.new 10.times do |i| - tokens << [i.to_s, :index] + tokens.text_token i.to_s, :index end assert_equal tokens, CodeRay::Encoders::Filter.new.encode_tokens(tokens) assert_equal tokens, tokens.filter @@ -68,9 +80,9 @@ class FilterTest < Test::Unit::TestCase def test_filtering_block_tokens tokens = CodeRay::Tokens.new 10.times do |i| - tokens << [:open, :index] - tokens << [i.to_s, :content] - tokens << [:close, :index] + tokens.begin_group :index + tokens.text_token i.to_s, :content + tokens.end_group :index end assert_equal tokens, CodeRay::Encoders::Filter.new.encode_tokens(tokens) assert_equal tokens, tokens.filter diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index dcdffa1..807fb42 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -83,7 +83,7 @@ module Encoders # # === :hint # Include some information into the output using the title attribute. - # Can be :info (show token type on mouse-over), :info_long (with full path) + # Can be :info (show token kind on mouse-over), :info_long (with full path) # or :debug (via inspect). # # Default: false @@ -153,12 +153,18 @@ module Encoders # # +hint+ may be :info, :info_long or :debug. def self.token_path_to_hint hint, kinds + # FIXME: TRANSPARENT_TOKEN_KINDS? + # if TRANSPARENT_TOKEN_KINDS.include? kinds.first + # kinds = kinds[1..-1] + # else + # kinds = kinds[1..-1] + kinds.first + # end title = case hint when :info TOKEN_KIND_TO_INFO[kinds.first] when :info_long - kinds.reverse.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/') + kinds.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/') when :debug kinds.inspect end @@ -167,13 +173,13 @@ module Encoders def setup options super - + @HTML_ESCAPE = HTML_ESCAPE.dup @HTML_ESCAPE["\t"] = ' ' * options[:tab_width] - + @opened = [nil] @css = CSS.new options[:style] - + hint = options[:hint] if hint and not [:debug, :info, :info_long].include? hint raise ArgumentError, "Unknown value %p for :hint; \ @@ -184,45 +190,33 @@ module Encoders when :class @css_style = Hash.new do |h, k| - c = CodeRay::Tokens::AbbreviationForKind[k.first] - if c == :NO_HIGHLIGHT and not hint - h[k.dup] = false - else - title = if hint - HTML.token_path_to_hint(hint, k[1..-1] << k.first) - else - '' - end - if c == :NO_HIGHLIGHT - h[k.dup] = '<span%s>' % [title] - else - h[k.dup] = '<span%s class="%s">' % [title, c] + c = Tokens::AbbreviationForKind[k.first] + h[k.dup] = + if c != :NO_HIGHLIGHT or hint + if hint + title = HTML.token_path_to_hint hint, k + end + if c == :NO_HIGHLIGHT + '<span%s>' % [title] + else + '<span%s class="%s">' % [title, c] + end end - end end when :style @css_style = Hash.new do |h, k| - if k.is_a? ::Array - styles = k.dup - else - styles = [k] - end - type = styles.first - classes = styles.map { |c| Tokens::AbbreviationForKind[c] } - if classes.first == :NO_HIGHLIGHT and not hint - h[k] = false - else - styles.shift if TRANSPARENT_TOKEN_KINDS.include? styles.first - title = HTML.token_path_to_hint hint, styles - style = @css[*classes] - h[k] = + classes = k.map { |c| Tokens::AbbreviationForKind[c] } + h[k.dup] = + if classes.first != :NO_HIGHLIGHT or hint + if hint + title = HTML.token_path_to_hint hint, k + end + style = @css[*classes] if style '<span%s style="%s">' % [title, style] - else - false end - end + end end else @@ -233,80 +227,81 @@ module Encoders def finish options not_needed = @opened.shift - @out << '</span>' * @opened.size unless @opened.empty? warn '%d tokens still open: %p' % [@opened.size, @opened] + @out << '</span>' * @opened.size end - + @out.extend Output @out.css = @css @out.numerize! options[:line_numbers], options @out.wrap! options[:wrap] @out.apply_title! options[:title] - + super end - - def token text, type - case text - - when nil - # raise 'Token with nil as text was given: %p' % [[text, type]] - - when String - if text =~ /#{HTML_ESCAPE_PATTERN}/o - text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } - end - @opened[0] = type - if text != "\n" && style = @css_style[@opened] - @out << style << text << '</span>' + + public + + def text_token text, kind + if text =~ /#{HTML_ESCAPE_PATTERN}/o + text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } + end + @opened[0] = kind + @out << + if style = @css_style[@opened] + style + text + '</span>' else - @out << text - end - - - # token groups, eg. strings - when :open - @opened[0] = type - @out << (@css_style[@opened] || '<span>') - @opened << type - when :close - if $CODERAY_DEBUG and (@opened.size == 1 or @opened.last != type) - warn 'Malformed token stream: Trying to close a token (%p) ' \ - 'that is not open. Open are: %p.' % [type, @opened[1..-1]] + text end + end + + # token groups, eg. strings + def begin_group kind + @opened[0] = kind + @opened << kind + @out << (@css_style[@opened] || '<span>') + end + + def end_group kind + if $CODERAY_DEBUG and (@opened.size == 1 or @opened.last != kind) + warn 'Malformed token stream: Trying to close a token (%p) ' \ + 'that is not open. Open are: %p.' % [kind, @opened[1..-1]] + end + @out << if @opened.empty? - # nothing to close + '' # nothing to close else - @out << '</span>' @opened.pop + '</span>' end - - # whole lines to be highlighted, eg. a deleted line in a diff - when :begin_line - @opened[0] = type - if style = @css_style[@opened] - @out << style.sub('<span', '<div') + end + + # whole lines to be highlighted, eg. a deleted line in a diff + def begin_line kind + @opened[0] = kind + style = @css_style[@opened] + @opened << kind + @out << + if style + style.sub '<span', '<div' else - @out << '<div>' - end - @opened << type - when :end_line - if $CODERAY_DEBUG and (@opened.size == 1 or @opened.last != type) - warn 'Malformed token stream: Trying to close a line (%p) ' \ - 'that is not open. Open are: %p.' % [type, @opened[1..-1]] + '<div>' end + end + + def end_line kind + if $CODERAY_DEBUG and (@opened.size == 1 or @opened.last != kind) + warn 'Malformed token stream: Trying to close a line (%p) ' \ + 'that is not open. Open are: %p.' % [kind, @opened[1..-1]] + end + @out << if @opened.empty? - # nothing to close + '' # nothing to close else - @out << '</div>' @opened.pop + '</div>' end - - else - raise 'unknown token kind: %p' % [text] - - end end end diff --git a/lib/coderay/encoders/json.rb b/lib/coderay/encoders/json.rb index 78f0ec0..bb09809 100644 --- a/lib/coderay/encoders/json.rb +++ b/lib/coderay/encoders/json.rb @@ -33,11 +33,23 @@ module Encoders end def text_token text, kind - { :type => 'text', :text => text, :kind => kind } + @out << { :type => 'text', :text => text, :kind => kind } end - def block_token action, kind - { :type => 'block', :action => action, :kind => kind } + def begin_group kind + @out << { :type => 'block', :action => 'open', :kind => kind } + end + + def end_group kind + @out << { :type => 'block', :action => 'close', :kind => kind } + end + + def begin_line kind + @out << { :type => 'block', :action => 'begin_line', :kind => kind } + end + + def end_line kind + @out << { :type => 'block', :action => 'end_line', :kind => kind } end def finish options diff --git a/lib/coderay/encoders/lines_of_code.rb b/lib/coderay/encoders/lines_of_code.rb index c6ed4de..6b36aef 100644 --- a/lib/coderay/encoders/lines_of_code.rb +++ b/lib/coderay/encoders/lines_of_code.rb @@ -79,9 +79,9 @@ puts "Hello world!" def test_filtering_block_tokens tokens = CodeRay::Tokens.new - tokens << ["Hello\n", :world] - tokens << ["Hello\n", :space] - tokens << ["Hello\n", :comment] + tokens.concat ["Hello\n", :world] + tokens.concat ["Hello\n", :space] + tokens.concat ["Hello\n", :comment] assert_equal 2, CodeRay::Encoders::LinesOfCode.new.encode_tokens(tokens) assert_equal 2, tokens.lines_of_code assert_equal 2, tokens.loc diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb index 1b38938..d267b21 100644 --- a/lib/coderay/encoders/statistic.rb +++ b/lib/coderay/encoders/statistic.rb @@ -1,3 +1,4 @@ +($:.unshift '../..'; require 'coderay') unless defined? CodeRay module CodeRay module Encoders @@ -34,9 +35,25 @@ module Encoders end # TODO Hierarchy handling - def block_token action, kind + def begin_group kind + block_token 'begin_group' + end + + def end_group kind + block_token 'end_group' + end + + def begin_line kind + block_token 'begin_line' + end + + def end_line kind + block_token 'end_line' + end + + def block_token action @type_stats['TOTAL'].count += 1 - @type_stats['open/close'].count += 1 + @type_stats[action].count += 1 end STATS = <<-STATS # :nodoc: @@ -77,3 +94,67 @@ Token Types (%d): end end + +if $0 == __FILE__ + $VERBOSE = true + $: << File.join(File.dirname(__FILE__), '..') + eval DATA.read, nil, $0, __LINE__ + 4 +end + +__END__ +require 'test/unit' + +class StatisticEncoderTest < Test::Unit::TestCase + + def test_creation + assert CodeRay::Encoders::Statistic < CodeRay::Encoders::Encoder + stats = nil + assert_nothing_raised do + stats = CodeRay.encoder :statistic + end + assert_kind_of CodeRay::Encoders::Encoder, stats + end + + TEST_INPUT = CodeRay::Tokens[ + ['10', :integer], + ['(\\)', :operator], + [:begin_group, :string], + ['test', :content], + [:end_group, :string], + [:begin_line, :test], + ["\n", :space], + ["\n \t", :space], + [" \n", :space], + ["[]", :method], + [:end_line, :test], + ].flatten + TEST_OUTPUT = <<-'DEBUG' + +Code Statistics + +Tokens 11 + Non-Whitespace 4 +Bytes Total 20 + +Token Types (5): + type count ratio size (average) +------------------------------------------------------------- + TOTAL 11 100.00 % 1.8 + space 3 27.27 % 3.0 + begin_group 1 9.09 % 0.0 + begin_line 1 9.09 % 0.0 + content 1 9.09 % 4.0 + end_group 1 9.09 % 0.0 + end_line 1 9.09 % 0.0 + integer 1 9.09 % 2.0 + method 1 9.09 % 2.0 + operator 1 9.09 % 3.0 + + DEBUG + + def test_filtering_text_tokens + assert_equal TEST_OUTPUT, CodeRay::Encoders::Statistic.new.encode_tokens(TEST_INPUT) + assert_equal TEST_OUTPUT, TEST_INPUT.statistic + end + +end
\ No newline at end of file diff --git a/lib/coderay/encoders/terminal.rb b/lib/coderay/encoders/terminal.rb index 7224218..3a774a0 100644 --- a/lib/coderay/encoders/terminal.rb +++ b/lib/coderay/encoders/terminal.rb @@ -92,41 +92,72 @@ module CodeRay TOKEN_COLORS[:keyword] = TOKEN_COLORS[:reserved] TOKEN_COLORS[:method] = TOKEN_COLORS[:function] TOKEN_COLORS[:imaginary] = TOKEN_COLORS[:complex] - TOKEN_COLORS[:open] = TOKEN_COLORS[:close] = TOKEN_COLORS[:nesting_delimiter] = TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter] + TOKEN_COLORS[:begin_group] = TOKEN_COLORS[:end_group] = + TOKEN_COLORS[:nesting_delimiter] = TOKEN_COLORS[:escape] = + TOKEN_COLORS[:delimiter] protected def setup(options) super @opened = [] + @subcolors = nil end - - def finish(options) - super - end - - def text_token text, type - if color = (@subcolors || TOKEN_COLORS)[type] + + public + + def text_token text, kind + if color = (@subcolors || TOKEN_COLORS)[kind] if Hash === color if color[:self] color = color[:self] else - return text + @out << text + return end end - - out = ansi_colorize(color) - out << text.gsub("\n", ansi_clear + "\n" + ansi_colorize(color)) - out << ansi_clear - out << ansi_colorize(@subcolors[:self]) if @subcolors && @subcolors[:self] - out + + @out << ansi_colorize(color) + @out << text.gsub("\n", ansi_clear + "\n" + ansi_colorize(color)) + @out << ansi_clear + @out << ansi_colorize(@subcolors[:self]) if @subcolors && @subcolors[:self] else - text + @out << text end end - def open_token type - if color = TOKEN_COLORS[type] + def begin_group kind + @opened << kind + @out << open_token(kind) + end + alias begin_line begin_group + + def end_group kind + if @opened.empty? + # nothing to close + else + @opened.pop + @out << ansi_clear + @out << open_token(@opened.last) + end + end + + def end_line kind + if @opened.empty? + # nothing to close + else + @opened.pop + # whole lines to be highlighted, + # eg. added/modified/deleted lines in a diff + @out << "\t" * 100 + ansi_clear + @out << open_token(@opened.last) + end + end + + private + + def open_token kind + if color = TOKEN_COLORS[kind] if Hash === color @subcolors = color ansi_colorize(color[:self]) if color[:self] @@ -140,34 +171,6 @@ module CodeRay end end - def block_token action, type - case action - - when :open, :begin_line - @opened << type - open_token type - when :close, :end_line - if @opened.empty? - # nothing to close - else - @opened.pop - if action == :end_line - # whole lines to be highlighted, - # eg. added/modified/deleted lines in a diff - "\t" * 100 + ansi_clear - else - ansi_clear - end + - open_token(@opened.last) - end - - else - raise 'unknown token kind: %p' % [text] - end - end - - private - def ansi_colorize(color) Array(color).map { |c| "\e[#{c}m" }.join end diff --git a/lib/coderay/encoders/text.rb b/lib/coderay/encoders/text.rb index 26fef84..ecbf624 100644 --- a/lib/coderay/encoders/text.rb +++ b/lib/coderay/encoders/text.rb @@ -23,16 +23,16 @@ module Encoders :separator => '' } + def text_token text, kind + @out << text + @sep + end + protected def setup options super @sep = options[:separator] end - def text_token text, kind - text + @sep - end - def finish options super.chomp @sep end diff --git a/lib/coderay/encoders/token_kind_filter.rb b/lib/coderay/encoders/token_kind_filter.rb index 4b2f582..fd3df44 100644 --- a/lib/coderay/encoders/token_kind_filter.rb +++ b/lib/coderay/encoders/token_kind_filter.rb @@ -76,28 +76,28 @@ class TokenKindFilterTest < Test::Unit::TestCase def test_filtering_text_tokens tokens = CodeRay::Tokens.new for i in 1..10 - tokens << [i.to_s, :index] - tokens << [' ', :space] if i < 10 + tokens.text_token i.to_s, :index + tokens.text_token ' ', :space if i < 10 end - assert_equal 10, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :space).size - assert_equal 10, tokens.token_kind_filter(:exclude => :space).size - assert_equal 9, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :include => :space).size - assert_equal 9, tokens.token_kind_filter(:include => :space).size - assert_equal 0, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :all).size - assert_equal 0, tokens.token_kind_filter(:exclude => :all).size + assert_equal 10, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :space).count + assert_equal 10, tokens.token_kind_filter(:exclude => :space).count + assert_equal 9, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :include => :space).count + assert_equal 9, tokens.token_kind_filter(:include => :space).count + assert_equal 0, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :all).count + assert_equal 0, tokens.token_kind_filter(:exclude => :all).count end def test_filtering_block_tokens tokens = CodeRay::Tokens.new 10.times do |i| - tokens << [:open, :index] - tokens << [i.to_s, :content] - tokens << [:close, :index] + tokens.begin_group :index + tokens.text_token i.to_s, :content + tokens.end_group :index end - assert_equal 20, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :include => :blubb).size - assert_equal 20, tokens.token_kind_filter(:include => :blubb).size - assert_equal 30, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :index).size - assert_equal 30, tokens.token_kind_filter(:exclude => :index).size + assert_equal 20, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :include => :blubb).count + assert_equal 20, tokens.token_kind_filter(:include => :blubb).count + assert_equal 30, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :index).count + assert_equal 30, tokens.token_kind_filter(:exclude => :index).count end end diff --git a/lib/coderay/encoders/xml.rb b/lib/coderay/encoders/xml.rb index f32c967..0006d75 100644 --- a/lib/coderay/encoders/xml.rb +++ b/lib/coderay/encoders/xml.rb @@ -53,19 +53,19 @@ module Encoders end end end - - def open_token kind + + def begin_group kind @node = @node.add_element kind.to_s end - - def close_token kind + + def end_group kind if @node == @root raise 'no token to close!' end @node = @node.parent end - + end - + end end diff --git a/lib/coderay/for_redcloth.rb b/lib/coderay/for_redcloth.rb index 5149562..e439929 100644 --- a/lib/coderay/for_redcloth.rb +++ b/lib/coderay/for_redcloth.rb @@ -45,7 +45,7 @@ module CodeRay if !opts[:lang] && RedCloth::VERSION.to_s >= '4.2.0' # simulating pre-4.2 behavior if opts[:text].sub!(/\A\[(\w+)\]/, '') - if CodeRay::Scanners[$1].plugin_id == 'plaintext' + if CodeRay::Scanners[$1].plugin_id == :plaintext opts[:text] = $& + opts[:text] else opts[:lang] = $1 diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb index 165fd7f..286561d 100644 --- a/lib/coderay/scanner.rb +++ b/lib/coderay/scanner.rb @@ -61,11 +61,6 @@ module CodeRay class << self - # Returns if the Scanner can be used in streaming mode. - def streamable? - is_a? Streamable - end - def normify code code = code.to_s.dup # try using UTF-8 @@ -115,9 +110,6 @@ module CodeRay # overwrite default options here.) # * +block+ is the callback for streamed highlighting. # - # If you set :stream to +true+ in the options, the Scanner uses a - # TokenStream with the +block+ as callback to handle the tokens. - # # Else, a Tokens object is used. def initialize code='', options = {}, &block raise "I am only the basic Scanner class. I can't scan "\ @@ -129,16 +121,13 @@ module CodeRay @tokens = options[:tokens] if @options[:stream] - warn "warning in CodeRay::Scanner.new: :stream is set, "\ - "but no block was given" unless block_given? - raise NotStreamableError, self unless kind_of? Streamable - @tokens ||= TokenStream.new(&block) + raise NotImplementedError unless @tokens.is_a? Encoders::Encoder else warn "warning in CodeRay::Scanner.new: Block given, "\ "but :stream is #{@options[:stream]}" if block_given? @tokens ||= Tokens.new end - @tokens.scanner = self + @tokens.scanner = self if @tokens.respond_to? :scanner= setup end @@ -162,7 +151,7 @@ module CodeRay # Returns the Plugin ID for this scanner. def lang - self.class.plugin_id + self.class.plugin_id.to_s end # Scans the code and returns all tokens in a Tokens object. @@ -191,8 +180,6 @@ module CodeRay # Traverses the tokens. def each &block - raise ArgumentError, - 'Cannot traverse TokenStream.' if @options[:stream] tokens.each(&block) end include Enumerable @@ -246,7 +233,7 @@ module CodeRay # Resets the scanner. def reset_instance - @tokens.clear unless @options[:keep_tokens] + @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens] @cached_tokens = nil @bin_string = nil if defined? @bin_string end diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb index e13dc37..45ca42e 100644 --- a/lib/coderay/scanners/c.rb +++ b/lib/coderay/scanners/c.rb @@ -43,7 +43,7 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options state = :initial label_expected = true @@ -53,9 +53,6 @@ module Scanners until eos? - kind = nil - match = nil - case state when :initial @@ -65,15 +62,14 @@ module Scanners in_preproc_line = false label_expected = label_expected_before_preproc_line end - tokens << [match, :space] - next + encoder.text_token match, :space - elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) - kind = :comment + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + encoder.text_token match, :comment elsif match = scan(/ \# \s* if \s* 0 /x) match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? - kind = :comment + encoder.text_token match, :comment elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) label_expected = match =~ /[;\{\}]/ @@ -81,7 +77,7 @@ module Scanners label_expected = true if match == ':' case_expected = false end - kind = :operator + encoder.text_token match, :operator elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) kind = IDENT_KIND[match] @@ -97,107 +93,96 @@ module Scanners end end end + encoder.text_token match, kind - elsif scan(/\$/) - kind = :ident + elsif match = scan(/\$/) + encoder.text_token match, :ident elsif match = scan(/L?"/) - tokens << [:open, :string] + encoder.begin_group :string if match[0] == ?L - tokens << ['L', :modifier] + encoder.text_token 'L', :modifier match = '"' end + encoder.text_token match, :delimiter state = :string - kind = :delimiter - elsif scan(/#[ \t]*(\w*)/) - kind = :preprocessor + elsif match = scan(/#[ \t]*(\w*)/) + encoder.text_token match, :preprocessor in_preproc_line = true label_expected_before_preproc_line = label_expected state = :include_expected if self[1] == 'include' - elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) label_expected = false - kind = :char + encoder.text_token match, :char - elsif scan(/0[xX][0-9A-Fa-f]+/) + elsif match = scan(/0[xX][0-9A-Fa-f]+/) label_expected = false - kind = :hex + encoder.text_token match, :hex - elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) label_expected = false - kind = :oct + encoder.text_token match, :oct - elsif scan(/(?:\d+)(?![.eEfF])L?L?/) + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) label_expected = false - kind = :integer + encoder.text_token match, :integer - elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) label_expected = false - kind = :float + encoder.text_token match, :float else - getch - kind = :error + encoder.text_token getch, :error end when :string - if scan(/[^\\\n"]+/) - kind = :content - elsif scan(/"/) - tokens << ['"', :delimiter] - tokens << [:close, :string] + if match = scan(/[^\\\n"]+/) + encoder.text_token match, :content + elsif match = scan(/"/) + encoder.text_token match, :delimiter + encoder.end_group :string state = :initial label_expected = false - next - elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - kind = :char - elsif scan(/ \\ | $ /x) - tokens << [:close, :string] - kind = :error + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/ \\ | $ /x) + encoder.end_group :string + encoder.text_token match, :error state = :initial label_expected = false else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder end when :include_expected - if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) - kind = :include + if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + encoder.text_token match, :include state = :initial elsif match = scan(/\s+/) - kind = :space + encoder.text_token match, :space state = :initial if match.index ?\n else state = :initial - next end else - raise_inspect 'Unknown state', tokens + raise_inspect 'Unknown state', encoder end - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - end if state == :string - tokens << [:close, :string] + encoder.end_group :string end - tokens + encoder end end diff --git a/lib/coderay/scanners/cpp.rb b/lib/coderay/scanners/cpp.rb index eba1bd2..7531892 100644 --- a/lib/coderay/scanners/cpp.rb +++ b/lib/coderay/scanners/cpp.rb @@ -53,7 +53,7 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options state = :initial label_expected = true @@ -63,9 +63,6 @@ module Scanners until eos? - kind = nil - match = nil - case state when :initial @@ -75,15 +72,14 @@ module Scanners in_preproc_line = false label_expected = label_expected_before_preproc_line end - tokens << [match, :space] - next + encoder.text_token match, :space - elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) - kind = :comment + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + encoder.text_token match, :comment elsif match = scan(/ \# \s* if \s* 0 /x) match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? - kind = :comment + encoder.text_token match, :comment elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) label_expected = match =~ /[;\{\}]/ @@ -91,7 +87,7 @@ module Scanners label_expected = true if match == ':' case_expected = false end - kind = :operator + encoder.text_token match, :operator elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) kind = IDENT_KIND[match] @@ -109,122 +105,110 @@ module Scanners end end end + encoder.text_token match, kind - elsif scan(/\$/) - kind = :ident + elsif match = scan(/\$/) + encoder.text_token match, :ident elsif match = scan(/L?"/) - tokens << [:open, :string] + encoder.begin_group :string if match[0] == ?L - tokens << ['L', :modifier] + encoder.text_token match, 'L', :modifier match = '"' end state = :string - kind = :delimiter + encoder.text_token match, :delimiter - elsif scan(/#[ \t]*(\w*)/) - kind = :preprocessor + elsif match = scan(/#[ \t]*(\w*)/) + encoder.text_token match, :preprocessor in_preproc_line = true label_expected_before_preproc_line = label_expected state = :include_expected if self[1] == 'include' - elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) label_expected = false - kind = :char + encoder.text_token match, :char - elsif scan(/0[xX][0-9A-Fa-f]+/) + elsif match = scan(/0[xX][0-9A-Fa-f]+/) label_expected = false - kind = :hex + encoder.text_token match, :hex - elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) label_expected = false - kind = :oct + encoder.text_token match, :oct - elsif scan(/(?:\d+)(?![.eEfF])L?L?/) + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) label_expected = false - kind = :integer + encoder.text_token match, :integer - elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) label_expected = false - kind = :float + encoder.text_token match, :float else - getch - kind = :error + encoder.text_token getch, :error end when :string - if scan(/[^\\"]+/) - kind = :content - elsif scan(/"/) - tokens << ['"', :delimiter] - tokens << [:close, :string] + if match = scan(/[^\\"]+/) + encoder.text_token match, :content + elsif match = scan(/"/) + encoder.text_token match, :delimiter + encoder.end_group :string state = :initial label_expected = false - next - elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - kind = :char - elsif scan(/ \\ | $ /x) - tokens << [:close, :string] - kind = :error + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/ \\ | $ /x) + encoder.end_group :string + encoder.text_token match, :error state = :initial label_expected = false else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder end when :include_expected - if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) - kind = :include + if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + encoder.text_token match, :include state = :initial elsif match = scan(/\s+/) - kind = :space + encoder.text_token match, :space state = :initial if match.index ?\n else state = :initial - next end when :class_name_expected - if scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = :class + if match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + encoder.text_token match, :class state = :initial elsif match = scan(/\s+/) - kind = :space + encoder.text_token match, :space else - getch - kind = :error + encoder.text_token getch, :error state = :initial end else - raise_inspect 'Unknown state', tokens - - end + raise_inspect 'Unknown state', encoder - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] end if state == :string - tokens << [:close, :string] + encoder.end_group :string end - tokens + encoder end end diff --git a/lib/coderay/scanners/css.rb b/lib/coderay/scanners/css.rb index 75cd056..b3f116e 100644 --- a/lib/coderay/scanners/css.rb +++ b/lib/coderay/scanners/css.rb @@ -51,129 +51,123 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options value_expected = nil states = [:initial] until eos? - kind = nil - match = nil - - if scan(/\s+/) - kind = :space + if match = scan(/\s+/) + encoder.text_token match, :space elsif case states.last when :initial, :media - if scan(/(?>#{RE::Ident})(?!\()|\*/ox) - kind = :type - elsif scan RE::Class - kind = :class - elsif scan RE::Id - kind = :constant - elsif scan RE::PseudoClass - kind = :pseudo_class + if match = scan(/(?>#{RE::Ident})(?!\()|\*/ox) + encoder.text_token match, :type + elsif match = scan(RE::Class) + encoder.text_token match, :class + elsif match = scan(RE::Id) + encoder.text_token match, :constant + elsif match = scan(RE::PseudoClass) + encoder.text_token match, :pseudo_class elsif match = scan(RE::AttributeSelector) # TODO: Improve highlighting inside of attribute selectors. - tokens << [match[0,1], :operator] - tokens << [match[1..-2], :attribute_name] if match.size > 2 - tokens << [match[-1,1], :operator] if match[-1] == ?] - next + encoder.text_token match[0,1], :operator + encoder.text_token match[1..-2], :attribute_name if match.size > 2 + encoder.text_token match[-1,1], :operator if match[-1] == ?] elsif match = scan(/@media/) - kind = :directive + encoder.text_token match, :directive states.push :media_before_name end when :block - if scan(/(?>#{RE::Ident})(?!\()/ox) + if match = scan(/(?>#{RE::Ident})(?!\()/ox) if value_expected - kind = :value + encoder.text_token match, :value else - kind = :key + encoder.text_token match, :key end end when :media_before_name - if scan RE::Ident - kind = :type + if match = scan(RE::Ident) + encoder.text_token match, :type states[-1] = :media_after_name end when :media_after_name - if scan(/\{/) - kind = :operator + if match = scan(/\{/) + encoder.text_token match, :operator states[-1] = :media end when :comment - if scan(/(?:[^*\s]|\*(?!\/))+/) - kind = :comment - elsif scan(/\*\//) - kind = :comment + if match = scan(/(?:[^*\s]|\*(?!\/))+/) + encoder.text_token match, :comment + elsif match = scan(/\*\//) + encoder.text_token match, :comment states.pop - elsif scan(/\s+/) - kind = :space + elsif match = scan(/\s+/) + encoder.text_token match, :space end else - raise_inspect 'Unknown state', tokens + raise_inspect 'Unknown state', encoder end - elsif scan(/\/\*/) - kind = :comment + elsif match = scan(/\/\*/) + encoder.text_token match, :comment states.push :comment - elsif scan(/\{/) + elsif match = scan(/\{/) value_expected = false - kind = :operator + encoder.text_token match, :operator states.push :block - elsif scan(/\}/) + elsif match = scan(/\}/) value_expected = false if states.last == :block || states.last == :media - kind = :operator + encoder.text_token match, :operator states.pop else - kind = :error + encoder.text_token match, :error end elsif match = scan(/#{RE::String}/o) - tokens << [:open, :string] - tokens << [match[0, 1], :delimiter] - tokens << [match[1..-2], :content] if match.size > 2 - tokens << [match[-1, 1], :delimiter] if match.size >= 2 - tokens << [:close, :string] - next + encoder.begin_group :string + encoder.text_token match[0, 1], :delimiter + encoder.text_token match[1..-2], :content if match.size > 2 + encoder.text_token match[-1, 1], :delimiter if match.size >= 2 + encoder.end_group :string elsif match = scan(/#{RE::Function}/o) - tokens << [:open, :string] + encoder.begin_group :string start = match[/^\w+\(/] - tokens << [start, :delimiter] + encoder.text_token start, :delimiter if match[-1] == ?) - tokens << [match[start.size..-2], :content] - tokens << [')', :delimiter] + encoder.text_token match[start.size..-2], :content + encoder.text_token ')', :delimiter else - tokens << [match[start.size..-1], :content] + encoder.text_token match[start.size..-1], :content end - tokens << [:close, :string] - next + encoder.end_group :string - elsif scan(/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/ox) - kind = :float + elsif match = scan(/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/ox) + encoder.text_token match, :float - elsif scan(/#{RE::Color}/o) - kind = :color + elsif match = scan(/#{RE::Color}/o) + encoder.text_token match, :color - elsif scan(/! *important/) - kind = :important + elsif match = scan(/! *important/) + encoder.text_token match, :important - elsif scan(/(?:rgb|hsl)a?\([^()\n]*\)?/) - kind = :color + elsif match = scan(/(?:rgb|hsl)a?\([^()\n]*\)?/) + encoder.text_token match, :color - elsif scan(/#{RE::AtKeyword}/o) - kind = :directive + elsif match = scan(RE::AtKeyword) + encoder.text_token match, :directive elsif match = scan(/ [+>:;,.=()\/] /x) if match == ':' @@ -181,26 +175,16 @@ module Scanners elsif match == ';' value_expected = false end - kind = :operator + encoder.text_token match, :operator else - getch - kind = :error - - end + encoder.text_token getch, :error - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] end - tokens + encoder end end diff --git a/lib/coderay/scanners/debug.rb b/lib/coderay/scanners/debug.rb index e33bff2..0f2b89f 100644 --- a/lib/coderay/scanners/debug.rb +++ b/lib/coderay/scanners/debug.rb @@ -14,67 +14,52 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options opened_tokens = [] until eos? - kind = nil - match = nil - - if scan(/\s+/) - tokens << [matched, :space] - next - - elsif scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x) - kind = self[1].to_sym - match = self[2].gsub(/\\(.)/, '\1') - unless Tokens::AbbreviationForKind.has_key? kind - kind = :error - match = matched - end - - elsif scan(/ (\w+) ([<\[]) /x) - kind = self[1].to_sym - opened_tokens << kind - case self[2] - when '<' - match = :open - when '[' - match = :begin_line - else - raise - end - - elsif !opened_tokens.empty? && scan(/ > /x) - kind = opened_tokens.pop - match = :close - - elsif !opened_tokens.empty? && scan(/ \] /x) - kind = opened_tokens.pop - match = :end_line - + if match = scan(/\s+/) + encoder.text_token match, :space + + elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x) + kind = self[1].to_sym + match = self[2].gsub(/\\(.)/, '\1') + unless Tokens::AbbreviationForKind.has_key? kind + kind = :error + match = matched + end + encoder.text_token match, kind + + elsif match = scan(/ (\w+) ([<\[]) /x) + kind = self[1].to_sym + opened_tokens << kind + case self[2] + when '<' + encoder.begin_group kind + when '[' + encoder.begin_line kind else - kind = :space - getch - + raise 'CodeRay bug: This case should not be reached.' end - - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens + + elsif !opened_tokens.empty? && match = scan(/ > /x) + encoder.end_group opened_tokens.pop + + elsif !opened_tokens.empty? && match = scan(/ \] /x) + encoder.end_line opened_tokens.pop + + else + encoder.text_token getch, :space + end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] end - tokens << [:close, opened_tokens.pop] until opened_tokens.empty? + encoder.end_group opened_tokens.pop until opened_tokens.empty? - tokens + encoder end end @@ -111,14 +96,14 @@ method([])] TEST_OUTPUT = CodeRay::Tokens[ ['10', :integer], ['(\\)', :operator], - [:open, :string], + [:begin_group, :string], ['test', :content], - [:close, :string], + [:end_group, :string], [:begin_line, :test], ["\n\n \t \n", :space], ["[]", :method], [:end_line, :test], - ] + ].flatten def test_filtering_text_tokens assert_equal TEST_OUTPUT, CodeRay::Scanners::Debug.new.tokenize(TEST_INPUT) diff --git a/lib/coderay/scanners/delphi.rb b/lib/coderay/scanners/delphi.rb index 170f250..e0f4ea1 100644 --- a/lib/coderay/scanners/delphi.rb +++ b/lib/coderay/scanners/delphi.rb @@ -42,110 +42,100 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options state = :initial last_token = '' - + until eos? - - kind = nil - match = nil - + if state == :initial - if scan(/ \s+ /x) - tokens << [matched, :space] + if match = scan(/ \s+ /x) + encoder.text_token match, :space next - elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) - tokens << [matched, :preprocessor] + elsif match = scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) + encoder.text_token match, :preprocessor next - elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) - tokens << [matched, :comment] + elsif match = scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) + encoder.text_token match, :comment next elsif match = scan(/ <[>=]? | >=? | :=? | [-+=*\/;,@\^|\(\)\[\]] | \.\. /x) - kind = :operator + encoder.text_token match, :operator elsif match = scan(/\./) - kind = :operator - if last_token == 'end' - tokens << [match, kind] - next - end + encoder.text_token match, :operator + next if last_token == 'end' elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = NAME_FOLLOWS[last_token] ? :ident : IDENT_KIND[match] + encoder.text_token match, NAME_FOLLOWS[last_token] ? :ident : IDENT_KIND[match] - elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) - tokens << [:open, :char] - tokens << ["'", :delimiter] - tokens << [self[1], :content] - tokens << ["'", :delimiter] - tokens << [:close, :char] + elsif match = skip(/ ' ( [^\n']|'' ) (?:'|$) /x) + encoder.begin_group :char + encoder.text_token "'", :delimiter + encoder.text_token self[1], :content + encoder.text_token "'", :delimiter + encoder.end_group :char next elsif match = scan(/ ' /x) - tokens << [:open, :string] + encoder.begin_group :string + encoder.text_token match, :delimiter state = :string - kind = :delimiter - elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) - kind = :char + elsif match = scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) + encoder.text_token match, :char - elsif scan(/ \$ [0-9A-Fa-f]+ /x) - kind = :hex + elsif match = scan(/ \$ [0-9A-Fa-f]+ /x) + encoder.text_token match, :hex - elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) - kind = :integer + elsif match = scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) + encoder.text_token match, :integer + + elsif match = scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) + encoder.text_token match, :float - elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) - kind = :float - else - kind = :error - getch - + encoder.text_token getch, :error + next + end elsif state == :string - if scan(/[^\n']+/) - kind = :content - elsif scan(/''/) - kind = :char - elsif scan(/'/) - tokens << ["'", :delimiter] - tokens << [:close, :string] + if match = scan(/[^\n']+/) + encoder.text_token match, :content + elsif match = scan(/''/) + encoder.text_token match, :char + elsif match = scan(/'/) + encoder.text_token match, :delimiter + encoder.end_group :string state = :initial next - elsif scan(/\n/) - tokens << [:close, :string] - kind = :error + elsif match = scan(/\n/) + encoder.end_group :string + encoder.text_token match, :space state = :initial else - raise "else case \' reached; %p not handled." % peek(1), tokens + raise "else case \' reached; %p not handled." % peek(1), encoder end else - raise 'else-case reached', tokens + raise 'else-case reached', encoder end - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens, state - end - raise_inspect 'Empty token', tokens unless match - last_token = match - tokens << [match, kind] end - tokens + if state == :string + encoder.end_group state + end + + encoder end end diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb index 4f3ff2e..417985a 100644 --- a/lib/coderay/scanners/diff.rb +++ b/lib/coderay/scanners/diff.rb @@ -13,7 +13,7 @@ module Scanners require 'coderay/helpers/file_type' - def scan_tokens tokens, options + def scan_tokens encoder, options line_kind = nil state = :initial @@ -21,14 +21,13 @@ module Scanners content_lang = nil until eos? - kind = match = nil if match = scan(/\n/) if line_kind - tokens << [:end_line, line_kind] + encoder.end_line line_kind line_kind = nil end - tokens << [match, :space] + encoder.text_token match, :space next end @@ -36,89 +35,82 @@ module Scanners when :initial if match = scan(/--- |\+\+\+ |=+|_+/) - tokens << [:begin_line, line_kind = :head] - tokens << [match, :head] - if filename = scan(/.*?(?=$|[\t\n\x00]| \(revision)/) - tokens << [filename, :filename] - content_lang = FileType.fetch filename, :plaintext + encoder.begin_line line_kind = :head + encoder.text_token match, :head + if match = scan(/.*?(?=$|[\t\n\x00]| \(revision)/) + encoder.text_token match, :filename + content_lang = FileType.fetch match, :plaintext end next unless match = scan(/.+/) - kind = :plain + encoder.text_token match, :plain elsif match = scan(/Index: |Property changes on: /) - tokens << [:begin_line, line_kind = :head] - tokens << [match, :head] + encoder.begin_line line_kind = :head + encoder.text_token match, :head next unless match = scan(/.+/) - kind = :plain + encoder.text_token match, :plain elsif match = scan(/Added: /) - tokens << [:begin_line, line_kind = :head] - tokens << [match, :head] + encoder.begin_line line_kind = :head + encoder.text_token match, :head next unless match = scan(/.+/) - kind = :plain + encoder.text_token match, :plain state = :added elsif match = scan(/\\ /) - tokens << [:begin_line, line_kind = :change] - tokens << [match, :change] + encoder.begin_line line_kind = :change + encoder.text_token match, :change next unless match = scan(/.+/) - kind = :plain + encoder.text_token match, :plain elsif match = scan(/@@(?>[^@\n]*)@@/) if check(/\n|$/) - tokens << [:begin_line, line_kind = :change] + encoder.begin_line line_kind = :change else - tokens << [:open, :change] + encoder.begin_group :change end - tokens << [match[0,2], :change] - tokens << [match[2...-2], :plain] if match.size > 4 - tokens << [match[-2,2], :change] - tokens << [:close, :change] unless line_kind - next unless code = scan(/.+/) - CodeRay.scan code, content_lang, :tokens => tokens + encoder.text_token match[0,2], :change + encoder.text_token match[2...-2], :plain if match.size > 4 + encoder.text_token match[-2,2], :change + encoder.end_group :change unless line_kind + next unless match = scan(/.+/) + CodeRay.scan match, content_lang, :tokens => encoder next elsif match = scan(/\+/) - tokens << [:begin_line, line_kind = :insert] - tokens << [match, :insert] + encoder.begin_line line_kind = :insert + encoder.text_token match, :insert next unless match = scan(/.+/) - CodeRay.scan match, content_lang, :tokens => tokens + CodeRay.scan match, content_lang, :tokens => encoder next elsif match = scan(/-/) - tokens << [:begin_line, line_kind = :delete] - tokens << [match, :delete] - next unless code = scan(/.+/) - CodeRay.scan code, content_lang, :tokens => tokens + encoder.begin_line line_kind = :delete + encoder.text_token match, :delete + next unless match = scan(/.+/) + CodeRay.scan match, content_lang, :tokens => encoder next - elsif code = scan(/ .*/) - CodeRay.scan code, content_lang, :tokens => tokens + elsif match = scan(/ .*/) + CodeRay.scan match, content_lang, :tokens => encoder next - elsif scan(/.+/) - tokens << [:begin_line, line_kind = :comment] - kind = :plain + elsif match = scan(/.+/) + encoder.begin_line line_kind = :comment + encoder.text_token match, :plain else raise_inspect 'else case rached' end when :added if match = scan(/ \+/) - tokens << [:begin_line, line_kind = :insert] - tokens << [match, :insert] + encoder.begin_line line_kind = :insert + encoder.text_token match, :insert next unless match = scan(/.+/) - kind = :plain + encoder.text_token match, :plain else state = :initial next end end - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] end - tokens << [:end_line, line_kind] if line_kind - tokens + encoder.end_line line_kind if line_kind + + encoder end end diff --git a/lib/coderay/scanners/groovy.rb b/lib/coderay/scanners/groovy.rb index fd7fbd9..fdbbbc7 100644 --- a/lib/coderay/scanners/groovy.rb +++ b/lib/coderay/scanners/groovy.rb @@ -1,11 +1,11 @@ module CodeRay module Scanners - + load :java # Scanner for Groovy. class Groovy < Java - + include Streamable register_for :groovy @@ -37,7 +37,7 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options state = :initial inline_block_stack = [] @@ -45,35 +45,32 @@ module Scanners string_delimiter = nil import_clause = class_name_follows = last_token = after_def = false value_expected = true - + until eos? - - kind = nil - match = nil case state - + when :initial - + if match = scan(/ \s+ | \\\n /x) - tokens << [match, :space] + encoder.text_token match, :space if match.index ?\n import_clause = after_def = false value_expected = true unless value_expected end next - elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) value_expected = true after_def = false - kind = :comment + encoder.text_token match, :comment - elsif bol? && scan(/ \#!.* /x) - kind = :doctype + elsif bol? && match = scan(/ \#!.* /x) + encoder.text_token match, :doctype - elsif import_clause && scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox) + elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox) after_def = value_expected = false - kind = :include + encoder.text_token match, :include elsif match = scan(/ #{IDENT} | \[\] /ox) kind = IDENT_KIND[match] @@ -93,16 +90,17 @@ module Scanners import_clause = match == 'import' after_def = true if match == 'def' end + encoder.text_token match, kind - elsif scan(/;/) + elsif match = scan(/;/) import_clause = after_def = false value_expected = true - kind = :operator + encoder.text_token match, :operator - elsif scan(/\{/) + elsif match = scan(/\{/) class_name_follows = after_def = false value_expected = true - kind = :operator + encoder.text_token match, :operator if !inline_block_stack.empty? inline_block_paren_depth += 1 end @@ -113,155 +111,146 @@ module Scanners value_expected = true value_expected = :regexp if match == '~' after_def = false - kind = :operator + encoder.text_token match, :operator elsif match = scan(/ [)\]}] /x) value_expected = after_def = false if !inline_block_stack.empty? && match == '}' inline_block_paren_depth -= 1 if inline_block_paren_depth == 0 # closing brace of inline block reached - tokens << [match, :inline_delimiter] - tokens << [:close, :inline] + encoder.text_token match, :inline_delimiter + encoder.end_group :inline state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop next end end - kind = :operator + encoder.text_token match, :operator elsif check(/[\d.]/) after_def = value_expected = false - if scan(/0[xX][0-9A-Fa-f]+/) - kind = :hex - elsif scan(/(?>0[0-7]+)(?![89.eEfF])/) - kind = :oct - elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/) - kind = :float - elsif scan(/\d+[lLgG]?/) - kind = :integer + if match = scan(/0[xX][0-9A-Fa-f]+/) + encoder.text_token match, :hex + elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/) + encoder.text_token match, :oct + elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/) + encoder.text_token match, :float + elsif match = scan(/\d+[lLgG]?/) + encoder.text_token match, :integer end - + elsif match = scan(/'''|"""/) after_def = value_expected = false state = :multiline_string - tokens << [:open, :string] + encoder.begin_group :string string_delimiter = match - kind = :delimiter - + encoder.text_token match, :delimiter + # TODO: record.'name' syntax elsif match = scan(/["']/) after_def = value_expected = false state = match == '/' ? :regexp : :string - tokens << [:open, state] + encoder.begin_group state string_delimiter = match - kind = :delimiter - - elsif value_expected && (match = scan(/\//)) + encoder.text_token match, :delimiter + + elsif value_expected && match = scan(/\//) after_def = value_expected = false - tokens << [:open, :regexp] + encoder.begin_group :regexp state = :regexp string_delimiter = '/' - kind = :delimiter - - elsif scan(/ @ #{IDENT} /ox) + encoder.text_token match, :delimiter + + elsif match = scan(/ @ #{IDENT} /ox) after_def = value_expected = false - kind = :annotation - - elsif scan(/\//) + encoder.text_token match, :annotation + + elsif match = scan(/\//) after_def = false value_expected = true - kind = :operator - + encoder.text_token match, :operator + else - getch - kind = :error - + encoder.text_token getch, :error + end - + when :string, :regexp, :multiline_string - if scan(STRING_CONTENT_PATTERN[string_delimiter]) - kind = :content + if match = scan(STRING_CONTENT_PATTERN[string_delimiter]) + encoder.text_token match, :content elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/) - tokens << [match, :delimiter] + encoder.text_token match, :delimiter if state == :regexp # TODO: regexp modifiers? s, m, x, i? modifiers = scan(/[ix]+/) - tokens << [modifiers, :modifier] if modifiers && !modifiers.empty? + encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty? end state = :string if state == :multiline_string - tokens << [:close, state] + encoder.end_group state string_delimiter = nil after_def = value_expected = false state = :initial next - + elsif (state == :string || state == :multiline_string) && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'") - kind = :content + encoder.text_token match, :content else - kind = :char + encoder.text_token match, :char end - elsif state == :regexp && scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - kind = :char - + elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/ \$ #{IDENT} /mox) - tokens << [:open, :inline] - tokens << ['$', :inline_delimiter] + encoder.begin_group :inline + encoder.text_token '$', :inline_delimiter match = match[1..-1] - tokens << [match, IDENT_KIND[match]] - tokens << [:close, :inline] + encoder.text_token match, IDENT_KIND[match] + encoder.end_group :inline next elsif match = scan(/ \$ \{ /x) - tokens << [:open, :inline] - tokens << ['${', :inline_delimiter] + encoder.begin_group :inline + encoder.text_token match, :inline_delimiter inline_block_stack << [state, string_delimiter, inline_block_paren_depth] inline_block_paren_depth = 1 state = :initial next - - elsif scan(/ \$ /mx) - kind = :content - - elsif scan(/ \\. /mx) - kind = :content - - elsif scan(/ \\ | \n /x) - tokens << [:close, state] - kind = :error + + elsif match = scan(/ \$ /mx) + encoder.text_token match, :content + + elsif match = scan(/ \\. /mx) + encoder.text_token match, :content # FIXME: Shouldn't this be :error? + + elsif match = scan(/ \\ | \n /x) + encoder.end_group state + encoder.text_token match, :error after_def = value_expected = false state = :initial - + else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end - + else - raise_inspect 'Unknown state', tokens - - end - - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens + raise_inspect 'Unknown state', encoder + end - raise_inspect 'Empty token', tokens unless match last_token = match unless [:space, :comment, :doctype].include? kind - tokens << [match, kind] - end - + if [:multiline_string, :string, :regexp].include? state - tokens << [:close, state] + encoder.end_group state end - - tokens + + encoder end - + end - + end end diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 52c7520..8f71e0e 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -53,135 +53,125 @@ module Scanners @state = :initial @plain_string_content = nil end - - def scan_tokens tokens, options - + + def scan_tokens encoder, options + state = @state plain_string_content = @plain_string_content - + until eos? - - kind = nil - match = nil - - if scan(/\s+/m) - kind = :space - + + if match = scan(/\s+/m) + encoder.text_token match, :space + else - + case state - + when :initial - if scan(/<!--.*?-->/m) - kind = :comment - elsif scan(/<!DOCTYPE.*?>/m) - kind = :doctype - elsif scan(/<\?xml.*?\?>/m) - kind = :preprocessor - elsif scan(/<\?.*?\?>|<%.*?%>/m) - kind = :comment - elsif scan(/<\/[-\w.:]*>/m) - kind = :tag + if match = scan(/<!--.*?-->/m) + encoder.text_token match, :comment + elsif match = scan(/<!DOCTYPE.*?>/m) + encoder.text_token match, :doctype + elsif match = scan(/<\?xml.*?\?>/m) + encoder.text_token match, :preprocessor + elsif match = scan(/<\?.*?\?>|<%.*?%>/m) + encoder.text_token match, :comment + elsif match = scan(/<\/[-\w.:]*>/m) + encoder.text_token match, :tag elsif match = scan(/<[-\w.:]+>?/m) - kind = :tag + encoder.text_token match, :tag state = :attribute unless match[-1] == ?> - elsif scan(/[^<>&]+/) - kind = :plain - elsif scan(/#{ENTITY}/ox) - kind = :entity - elsif scan(/[<>&]/) - kind = :error + elsif match = scan(/[^<>&]+/) + encoder.text_token match, :plain + elsif match = scan(/#{ENTITY}/ox) + encoder.text_token match, :entity + elsif match = scan(/[<>&]/) + encoder.text_token match, :error else - raise_inspect '[BUG] else-case reached with state %p' % [state], tokens + raise_inspect '[BUG] else-case reached with state %p' % [state], encoder end - + when :attribute - if scan(/#{TAG_END}/) - kind = :tag + if match = scan(/#{TAG_END}/) + encoder.text_token match, :tag state = :initial - elsif scan(/#{ATTR_NAME}/o) - kind = :attribute_name + elsif match = scan(/#{ATTR_NAME}/o) + encoder.text_token match, :attribute_name state = :attribute_equal else - kind = :error - getch + encoder.text_token getch, :error end - + when :attribute_equal - if scan(/=/) - kind = :operator + if match = scan(/=/) + encoder.text_token match, :operator state = :attribute_value - elsif scan(/#{ATTR_NAME}/o) - kind = :attribute_name - elsif scan(/#{TAG_END}/o) - kind = :tag + elsif match = scan(/#{ATTR_NAME}/o) + encoder.text_token match, :attribute_name + elsif match = scan(/#{TAG_END}/o) + encoder.text_token match, :tag state = :initial - elsif scan(/./) - kind = :error + else + encoder.text_token getch, :error state = :attribute end - + when :attribute_value - if scan(/#{ATTR_NAME}/o) - kind = :attribute_value + if match = scan(/#{ATTR_NAME}/o) + encoder.text_token match, :attribute_value state = :attribute elsif match = scan(/["']/) - tokens << [:open, :string] + encoder.begin_group :string state = :attribute_value_string plain_string_content = PLAIN_STRING_CONTENT[match] - kind = :delimiter + encoder.text_token match, :delimiter elsif scan(/#{TAG_END}/o) - kind = :tag + encoder.text_token match, :tag state = :initial else - kind = :error - getch + encoder.text_token getch, :error end - + when :attribute_value_string - if scan(plain_string_content) - kind = :content - elsif scan(/['"]/) - tokens << [matched, :delimiter] - tokens << [:close, :string] + if match = scan(plain_string_content) + encoder.text_token match, :content + elsif match = scan(/['"]/) + encoder.text_token match, :delimiter + encoder.end_group :string state = :attribute - next - elsif scan(/#{ENTITY}/ox) - kind = :entity - elsif scan(/&/) - kind = :content - elsif scan(/[\n>]/) - tokens << [:close, :string] - kind = :error + elsif match = scan(/#{ENTITY}/ox) + encoder.text_token match, :entity + elsif match = scan(/&/) + encoder.text_token match, :content + elsif match = scan(/[\n>]/) + encoder.end_group :string state = :initial + encoder.text_token match, :error end - + else - raise_inspect 'Unknown state: %p' % [state], tokens - + raise_inspect 'Unknown state: %p' % [state], encoder + end - + end - - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens, state - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] + end - + if options[:keep_state] @state = state @plain_string_content = plain_string_content + else + if state == :attribute_value_string + encoder.end_group :string + end end - - tokens + + encoder end - + end - + end end diff --git a/lib/coderay/scanners/java.rb b/lib/coderay/scanners/java.rb index e4a7421..e7becda 100644 --- a/lib/coderay/scanners/java.rb +++ b/lib/coderay/scanners/java.rb @@ -48,7 +48,7 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options state = :initial string_delimiter = nil @@ -58,23 +58,20 @@ module Scanners until eos? - kind = nil - match = nil - case state when :initial if match = scan(/ \s+ | \\\n /x) - tokens << [match, :space] + encoder.text_token match, :space next elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) - tokens << [match, :comment] + encoder.text_token match, :comment next - elsif package_name_expected && scan(/ #{IDENT} (?: \. #{IDENT} )* /ox) - kind = package_name_expected + elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox) + encoder.text_token match, package_name_expected elsif match = scan(/ #{IDENT} | \[\] /ox) kind = IDENT_KIND[match] @@ -93,92 +90,82 @@ module Scanners class_name_follows = true end end + encoder.text_token match, kind - elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x) - kind = :operator + elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x) + encoder.text_token match, :operator - elsif scan(/;/) + elsif match = scan(/;/) package_name_expected = false - kind = :operator + encoder.text_token match, :operator - elsif scan(/\{/) + elsif match = scan(/\{/) class_name_follows = false - kind = :operator + encoder.text_token match, :operator elsif check(/[\d.]/) - if scan(/0[xX][0-9A-Fa-f]+/) - kind = :hex - elsif scan(/(?>0[0-7]+)(?![89.eEfF])/) - kind = :oct - elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/) - kind = :float - elsif scan(/\d+[lL]?/) - kind = :integer + if match = scan(/0[xX][0-9A-Fa-f]+/) + encoder.text_token match, :hex + elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/) + encoder.text_token match, :oct + elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/) + encoder.text_token match, :float + elsif match = scan(/\d+[lL]?/) + encoder.text_token match, :integer end elsif match = scan(/["']/) - tokens << [:open, :string] state = :string + encoder.begin_group state string_delimiter = match - kind = :delimiter + encoder.text_token match, :delimiter - elsif scan(/ @ #{IDENT} /ox) - kind = :annotation + elsif match = scan(/ @ #{IDENT} /ox) + encoder.text_token match, :annotation else - getch - kind = :error + encoder.text_token getch, :error end when :string - if scan(STRING_CONTENT_PATTERN[string_delimiter]) - kind = :content + if match = scan(STRING_CONTENT_PATTERN[string_delimiter]) + encoder.text_token match, :content elsif match = scan(/["'\/]/) - tokens << [match, :delimiter] - tokens << [:close, state] - string_delimiter = nil + encoder.text_token match, :delimiter + encoder.end_group state state = :initial - next + string_delimiter = nil elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) if string_delimiter == "'" && !(match == "\\\\" || match == "\\'") - kind = :content + encoder.text_token match, :content else - kind = :char + encoder.text_token match, :char end - elsif scan(/\\./m) - kind = :content - elsif scan(/ \\ | $ /x) - tokens << [:close, state] - kind = :error + elsif match = scan(/\\./m) + encoder.text_token match, :content + elsif match = scan(/ \\ | $ /x) + encoder.end_group state state = :initial + encoder.text_token match, :error else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder end else - raise_inspect 'Unknown state', tokens + raise_inspect 'Unknown state', encoder end - - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match last_token_dot = match == '.' - tokens << [match, kind] - end if state == :string - tokens << [:close, state] + encoder.end_group state end - tokens + encoder end end diff --git a/lib/coderay/scanners/java_script.rb b/lib/coderay/scanners/java_script.rb index 92ac005..3ae8d80 100644 --- a/lib/coderay/scanners/java_script.rb +++ b/lib/coderay/scanners/java_script.rb @@ -5,12 +5,12 @@ module Scanners # # Aliases: +ecmascript+, +ecma_script+, +javascript+ class JavaScript < Scanner - + include Streamable - + register_for :java_script file_extension 'js' - + # The actual JavaScript keywords. KEYWORDS = %w[ break case catch continue default delete do else @@ -40,7 +40,7 @@ module Scanners add(PREDEFINED_CONSTANTS, :pre_constant). add(MAGIC_VARIABLES, :local_variable). add(KEYWORDS, :keyword) # :nodoc: - + ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc: @@ -56,47 +56,43 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options state = :initial string_delimiter = nil value_expected = true key_expected = false function_expected = false - + until eos? - - kind = nil - match = nil case state - + when :initial - + if match = scan(/ \s+ | \\\n /x) value_expected = true if !value_expected && match.index(?\n) - tokens << [match, :space] - next - - elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + encoder.text_token match, :space + + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) value_expected = true - kind = :comment - + encoder.text_token match, :comment + elsif check(/\.?\d/) key_expected = value_expected = false - if scan(/0[xX][0-9A-Fa-f]+/) - kind = :hex - elsif scan(/(?>0[0-7]+)(?![89.eEfF])/) - kind = :oct - elsif scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) - kind = :float - elsif scan(/\d+/) - kind = :integer + if match = scan(/0[xX][0-9A-Fa-f]+/) + encoder.text_token match, :hex + elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/) + encoder.text_token match, :oct + elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + encoder.text_token match, :float + elsif match = scan(/\d+/) + encoder.text_token match, :integer end - + elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim) # FIXME: scan over nested tags - xml_scanner.tokenize match + xml_scanner.tokenize match, :tokens => encoder value_expected = false next @@ -105,12 +101,12 @@ module Scanners last_operator = match[-1] key_expected = (last_operator == ?{) || (last_operator == ?,) function_expected = false - kind = :operator - - elsif scan(/ [)\]}]+ /x) + encoder.text_token match, :operator + + elsif match = scan(/ [)\]}]+ /x) function_expected = key_expected = value_expected = false - kind = :operator - + encoder.text_token match, :operator + elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x) kind = IDENT_KIND[match] value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] @@ -128,101 +124,91 @@ module Scanners end function_expected = (kind == :keyword) && (match == 'function') key_expected = false - + encoder.text_token match, kind + elsif match = scan(/["']/) if key_expected && check(KEY_CHECK_PATTERN[match]) state = :key else state = :string end - tokens << [:open, state] + encoder.begin_group state string_delimiter = match - kind = :delimiter - + encoder.text_token match, :delimiter + elsif value_expected && (match = scan(/\/(?=\S)/)) - tokens << [:open, :regexp] + encoder.begin_group :regexp state = :regexp string_delimiter = '/' - kind = :delimiter - - elsif scan(/ \/ /x) + encoder.text_token match, :delimiter + + elsif match = scan(/ \/ /x) value_expected = true key_expected = false - kind = :operator - + encoder.text_token match, :operator + else - getch - kind = :error - + encoder.text_token getch, :error + end - + when :string, :regexp, :key - if scan(STRING_CONTENT_PATTERN[string_delimiter]) - kind = :content + if match = scan(STRING_CONTENT_PATTERN[string_delimiter]) + encoder.text_token match, :content elsif match = scan(/["'\/]/) - tokens << [match, :delimiter] + encoder.text_token match, :delimiter if state == :regexp modifiers = scan(/[gim]+/) - tokens << [modifiers, :modifier] if modifiers && !modifiers.empty? + encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty? end - tokens << [:close, state] + encoder.end_group state string_delimiter = nil key_expected = value_expected = false state = :initial - next elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) if string_delimiter == "'" && !(match == "\\\\" || match == "\\'") - kind = :content + encoder.text_token match, :content else - kind = :char + encoder.text_token match, :char end - elsif state == :regexp && scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - kind = :char - elsif scan(/\\./m) - kind = :content - elsif scan(/ \\ | $ /x) - tokens << [:close, state] - kind = :error + elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/\\./m) + encoder.text_token match, :content + elsif match = scan(/ \\ | $ /x) + encoder.end_group state + encoder.text_token match, :error key_expected = value_expected = false state = :initial else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder end - + else - raise_inspect 'Unknown state', tokens - - end - - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens + raise_inspect 'Unknown state', encoder + end - raise_inspect 'Empty token', tokens unless match - tokens << [match, kind] - end - + if [:string, :regexp].include? state - tokens << [:close, state] + encoder.end_group state end - - tokens + + encoder end - + protected - + def reset_instance super @xml_scanner.reset if defined? @xml_scanner end - + def xml_scanner @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false end - + end end diff --git a/lib/coderay/scanners/json.rb b/lib/coderay/scanners/json.rb index ca74ff3..668fd82 100644 --- a/lib/coderay/scanners/json.rb +++ b/lib/coderay/scanners/json.rb @@ -19,7 +19,7 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options state = :initial stack = [] @@ -27,82 +27,67 @@ module Scanners until eos? - kind = nil - match = nil - case state when :initial - if match = scan(/ \s+ | \\\n /x) - tokens << [match, :space] - next + if match = scan(/ \s+ /x) + encoder.text_token match, :space + elsif match = scan(/"/) + state = key_expected ? :key : :string + encoder.begin_group state + encoder.text_token match, :delimiter elsif match = scan(/ [:,\[{\]}] /x) - kind = :operator + encoder.text_token match, :operator case match - when '{' then stack << :object; key_expected = true - when '[' then stack << :array when ':' then key_expected = false when ',' then key_expected = true if stack.last == :object + when '{' then stack << :object; key_expected = true + when '[' then stack << :array when '}', ']' then stack.pop # no error recovery, but works for valid JSON end elsif match = scan(/ true | false | null /x) - kind = :value - elsif match = scan(/-?(?:0|[1-9]\d*)/) + encoder.text_token match, :value + elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x) kind = :integer - if scan(/\.\d+(?:[eE][-+]?\d+)?|[eE][-+]?\d+/) + if scan(/ \.\d+ (?:[eE][-+]?\d+)? | [eE][-+]? \d+ /x) match << matched kind = :float end - elsif match = scan(/"/) - state = key_expected ? :key : :string - tokens << [:open, state] - kind = :delimiter + encoder.text_token match, kind else - getch - kind = :error + encoder.text_token getch, :error end when :string, :key - if scan(/[^\\"]+/) - kind = :content - elsif scan(/"/) - tokens << ['"', :delimiter] - tokens << [:close, state] + if match = scan(/[^\\"]+/) + encoder.text_token match, :content + elsif match = scan(/"/) + encoder.text_token match, :delimiter + encoder.end_group state state = :initial - next - elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - kind = :char - elsif scan(/\\./m) - kind = :content - elsif scan(/ \\ | $ /x) - tokens << [:close, state] - kind = :error + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/\\./m) + encoder.text_token match, :content + elsif match = scan(/ \\ | $ /x) + encoder.end_group state + encoder.text_token match, :error state = :initial else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder end else - raise_inspect 'Unknown state', tokens + raise_inspect 'Unknown state', encoder end - - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - end if [:string, :key].include? state - tokens << [:close, state] + encoder.end_group state end - tokens + encoder end end diff --git a/lib/coderay/scanners/nitro_xhtml.rb b/lib/coderay/scanners/nitro_xhtml.rb index fe6b303..ba8ee71 100644 --- a/lib/coderay/scanners/nitro_xhtml.rb +++ b/lib/coderay/scanners/nitro_xhtml.rb @@ -1,14 +1,14 @@ module CodeRay module Scanners - + load :html load :ruby - + # Nitro XHTML Scanner # # Alias: +nitro+ class NitroXHTML < Scanner - + include Streamable register_for :nitro_xhtml file_extension :xhtml @@ -38,7 +38,7 @@ module Scanners ) (?: %> )? /mx # :nodoc: - + NITRO_VALUE_BLOCK = / \# (?: @@ -55,83 +55,83 @@ module Scanners | \\ [^\\]* \\? ) /x # :nodoc: - + NITRO_ENTITY = / % (?: \#\d+ | \w+ ) ; / # :nodoc: - + START_OF_RUBY = / (?=[<\#%]) < (?: \?r | % | ruby> ) | \# [{(|] | % (?: \#\d+ | \w+ ) ; /x # :nodoc: - + CLOSING_PAREN = Hash.new { |h, p| h[p] = p } # :nodoc: CLOSING_PAREN.update( { '(' => ')', '[' => ']', '{' => '}', } ) - + protected - + def setup @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true end - + def reset_instance super @html_scanner.reset end - - def scan_tokens tokens, options - + + def scan_tokens encoder, options + until eos? - - if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty? + + if (match = scan_until(/(?=#{START_OF_RUBY})/o) || match = scan_until(/\z/)) and not match.empty? @html_scanner.tokenize match - + elsif match = scan(/#{NITRO_VALUE_BLOCK}/o) start_tag = match[0,2] delimiter = CLOSING_PAREN[start_tag[1,1]] end_tag = match[-1,1] == delimiter ? delimiter : '' - tokens << [:open, :inline] - tokens << [start_tag, :inline_delimiter] + encoder.begin_group :inline + encoder.text_token start_tag, :inline_delimiter code = match[start_tag.size .. -1 - end_tag.size] - @ruby_scanner.tokenize code - tokens << [end_tag, :inline_delimiter] unless end_tag.empty? - tokens << [:close, :inline] - + @ruby_scanner.tokenize code, :tokens => encoder + encoder.text_token end_tag, :inline_delimiter unless end_tag.empty? + encoder.end_group :inline + elsif match = scan(/#{NITRO_RUBY_BLOCK}/o) start_tag = '<?r' end_tag = match[-2,2] == '?>' ? '?>' : '' - tokens << [:open, :inline] - tokens << [start_tag, :inline_delimiter] + encoder.begin_group :inline + encoder.text_token start_tag, :inline_delimiter code = match[start_tag.size .. -(end_tag.size)-1] - @ruby_scanner.tokenize code - tokens << [end_tag, :inline_delimiter] unless end_tag.empty? - tokens << [:close, :inline] - + @ruby_scanner.tokenize code, :tokens => encoder + encoder.text_token end_tag, :inline_delimiter unless end_tag.empty? + encoder.end_group :inline + elsif entity = scan(/#{NITRO_ENTITY}/o) - tokens << [entity, :entity] - + encoder.text_token entity, :entity + elsif scan(/%/) - tokens << [matched, :error] - + encoder.text_token matched, :error + else - raise_inspect 'else-case reached!', tokens + raise_inspect 'else-case reached!', encoder end - + end - - tokens - + + encoder + end - + end - + end end diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb index 289e795..67bb233 100644 --- a/lib/coderay/scanners/php.rb +++ b/lib/coderay/scanners/php.rb @@ -230,7 +230,7 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options if check(RE::PHP_START) || # starts with <? (match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <? @@ -252,29 +252,24 @@ module Scanners until eos? - match = nil - kind = nil - case states.last when :initial # HTML - if scan RE::PHP_START - kind = :inline_delimiter + if match = scan(RE::PHP_START) + encoder.text_token match, :inline_delimiter label_expected = true states << :php else match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/) @html_scanner.tokenize match unless match.empty? - next end when :php if match = scan(/\s+/) - tokens << [match, :space] - next + encoder.text_token match, :space - elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo) - kind = :comment + elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo) + encoder.text_token match, :comment elsif match = scan(RE::IDENTIFIER) kind = Words::IDENT_KIND[match] @@ -299,77 +294,68 @@ module Scanners next end end + encoder.text_token match, kind - elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i) + elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i) label_expected = false - kind = :float + encoder.text_token match, :float - elsif scan(/0x[0-9a-fA-F]+/) + elsif match = scan(/0x[0-9a-fA-F]+/) label_expected = false - kind = :hex + encoder.text_token match, :hex - elsif scan(/\d+/) + elsif match = scan(/\d+/) label_expected = false - kind = :integer - - elsif scan(/'/) - tokens << [:open, :string] - if modifier - tokens << [modifier, :modifier] - modifier = nil - end - kind = :delimiter - states.push :sqstring + encoder.text_token match, :integer - elsif match = scan(/["`]/) - tokens << [:open, :string] + elsif match = scan(/['"`]/) + encoder.begin_group :string if modifier - tokens << [modifier, :modifier] + encoder.text_token modifier, :modifier modifier = nil end delimiter = match - kind = :delimiter - states.push :dqstring + encoder.text_token match, :delimiter + states.push match == "'" ? :sqstring : :dqstring elsif match = scan(RE::VARIABLE) label_expected = false - kind = Words::VARIABLE_KIND[match] + encoder.text_token match, Words::VARIABLE_KIND[match] - elsif scan(/\{/) - kind = :operator + elsif match = scan(/\{/) + encoder.text_token match, :operator label_expected = true states.push :php - elsif scan(/\}/) + elsif match = scan(/\}/) if states.size == 1 - kind = :error + encoder.text_token match, :error else states.pop if states.last.is_a?(::Array) delimiter = states.last[1] states[-1] = states.last[0] - tokens << [matched, :delimiter] - tokens << [:close, :inline] - next + encoder.text_token match, :delimiter + encoder.end_group :inline else - kind = :operator + encoder.text_token match, :operator label_expected = true end end - elsif scan(/@/) + elsif match = scan(/@/) label_expected = false - kind = :exception + encoder.text_token match, :exception - elsif scan RE::PHP_END - kind = :inline_delimiter + elsif match = scan(RE::PHP_END) + encoder.text_token match, :inline_delimiter states = [:initial] elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o) - tokens << [:open, :string] + encoder.begin_group :string warn 'heredoc in heredoc?' if heredoc_delimiter heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3]) - kind = :delimiter + encoder.text_token match, :delimiter states.push self[3] ? :sqstring : :dqstring heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/ @@ -379,152 +365,141 @@ module Scanners label_expected = true if match == ':' case_expected = false end - kind = :operator + encoder.text_token match, :operator else - getch - kind = :error + encoder.text_token getch, :error end when :sqstring - if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/) - kind = :content - elsif !heredoc_delimiter && scan(/'/) - tokens << [matched, :delimiter] - tokens << [:close, :string] + if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/) + encoder.text_token match, :content + elsif !heredoc_delimiter && match = scan(/'/) + encoder.text_token match, :delimiter + encoder.end_group :string delimiter = nil label_expected = false states.pop - next elsif heredoc_delimiter && match = scan(/\n/) - kind = :content if scan heredoc_delimiter - tokens << ["\n", :content] - tokens << [matched, :delimiter] - tokens << [:close, :string] + encoder.text_token "\n", :content + encoder.text_token matched, :delimiter + encoder.end_group :string heredoc_delimiter = nil label_expected = false states.pop - next + else + encoder.text_token match, :content end - elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/) - kind = :char - elsif scan(/\\./m) - kind = :content - elsif scan(/\\/) - kind = :error + elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/) + encoder.text_token match, :char + elsif match = scan(/\\./m) + encoder.text_token match, :content + elsif match = scan(/\\/) + encoder.text_token match, :error + else + states.pop end when :dqstring - if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/)) - kind = :content - elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/) - tokens << [matched, :delimiter] - tokens << [:close, :string] + if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/)) + encoder.text_token match, :content + elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/) + encoder.text_token match, :delimiter + encoder.end_group :string delimiter = nil label_expected = false states.pop - next elsif heredoc_delimiter && match = scan(/\n/) - kind = :content if scan heredoc_delimiter - tokens << ["\n", :content] - tokens << [matched, :delimiter] - tokens << [:close, :string] + encoder.text_token "\n", :content + encoder.text_token matched, :delimiter + encoder.end_group :string heredoc_delimiter = nil label_expected = false states.pop - next + else + encoder.text_token match, :content end - elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/) - kind = :char - elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/)) - kind = :char - elsif scan(/\\./m) - kind = :content - elsif scan(/\\/) - kind = :error + elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/) + encoder.text_token match, :char + elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/)) + encoder.text_token match, :char + elsif match = scan(/\\./m) + encoder.text_token match, :content + elsif match = scan(/\\/) + encoder.text_token match, :error elsif match = scan(/#{RE::VARIABLE}/o) - kind = :local_variable if check(/\[#{RE::IDENTIFIER}\]/o) - tokens << [:open, :inline] - tokens << [match, :local_variable] - tokens << [scan(/\[/), :operator] - tokens << [scan(/#{RE::IDENTIFIER}/o), :ident] - tokens << [scan(/\]/), :operator] - tokens << [:close, :inline] - next + encoder.begin_group :inline + encoder.text_token match, :local_variable + encoder.text_token scan(/\[/), :operator + encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident + encoder.text_token scan(/\]/), :operator + encoder.end_group :inline elsif check(/\[/) match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o) - kind = :error + encoder.text_token match, :error elsif check(/->#{RE::IDENTIFIER}/o) - tokens << [:open, :inline] - tokens << [match, :local_variable] - tokens << [scan(/->/), :operator] - tokens << [scan(/#{RE::IDENTIFIER}/o), :ident] - tokens << [:close, :inline] - next + encoder.begin_group :inline + encoder.text_token match, :local_variable + encoder.text_token scan(/->/), :operator + encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident + encoder.end_group :inline elsif check(/->/) match << scan(/->/) - kind = :error + encoder.text_token match, :error + else + encoder.text_token match, :local_variable end elsif match = scan(/\{/) if check(/\$/) - kind = :delimiter + encoder.begin_group :inline states[-1] = [states.last, delimiter] delimiter = nil states.push :php - tokens << [:open, :inline] + encoder.text_token match, :delimiter else - kind = :string + encoder.text_token match, :string end - elsif scan(/\$\{#{RE::IDENTIFIER}\}/o) - kind = :local_variable - elsif scan(/\$/) - kind = :content + elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o) + encoder.text_token match, :local_variable + elsif match = scan(/\$/) + encoder.text_token match, :content + else + states.pop end when :class_expected - if scan(/\s+/) - kind = :space + if match = scan(/\s+/) + encoder.text_token match, :space elsif match = scan(/#{RE::IDENTIFIER}/o) - kind = :class + encoder.text_token match, :class states.pop else states.pop - next end when :function_expected - if scan(/\s+/) - kind = :space - elsif scan(/&/) - kind = :operator + if match = scan(/\s+/) + encoder.text_token match, :space + elsif match = scan(/&/) + encoder.text_token match, :operator elsif match = scan(/#{RE::IDENTIFIER}/o) - kind = :function + encoder.text_token match, :function states.pop else states.pop - next end else - raise_inspect 'Unknown state!', tokens, states - end - - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens, states + raise_inspect 'Unknown state!', encoder, states end - raise_inspect 'Empty token', tokens, states unless match - - tokens << [match, kind] end - tokens + encoder end end diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb index b8db721..e176403 100644 --- a/lib/coderay/scanners/plaintext.rb +++ b/lib/coderay/scanners/plaintext.rb @@ -17,8 +17,9 @@ module Scanners protected - def scan_tokens tokens, options - tokens << [string, :plain] + def scan_tokens encoder, options + encoder.text_token string, :plain + encoder end end diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb index be5205e..568ed57 100644 --- a/lib/coderay/scanners/python.rb +++ b/lib/coderay/scanners/python.rb @@ -98,7 +98,7 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options state = :initial string_delimiter = nil @@ -111,37 +111,34 @@ module Scanners until eos? - kind = nil - match = nil - if state == :string - if scan(STRING_DELIMITER_REGEXP[string_delimiter]) - tokens << [matched, :delimiter] - tokens << [:close, string_type] + if match = scan(STRING_DELIMITER_REGEXP[string_delimiter]) + encoder.text_token match, :delimiter + encoder.end_group string_type string_type = nil state = :initial next - elsif string_delimiter.size == 3 && scan(/\n/) - kind = :content - elsif scan(STRING_CONTENT_REGEXP[string_delimiter]) - kind = :content - elsif !string_raw && scan(/ \\ #{ESCAPE} /ox) - kind = :char - elsif scan(/ \\ #{UNICODE_ESCAPE} /ox) - kind = :char - elsif scan(/ \\ . /x) - kind = :content - elsif scan(/ \\ | $ /x) - tokens << [:close, string_type] + elsif string_delimiter.size == 3 && match = scan(/\n/) + encoder.text_token match, :content + elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter]) + encoder.text_token match, :content + elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox) + encoder.text_token match, :char + elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox) + encoder.text_token match, :char + elsif match = scan(/ \\ . /x) + encoder.text_token match, :content + elsif match = scan(/ \\ | $ /x) + encoder.end_group string_type string_type = nil - kind = :error + encoder.text_token match, :error state = :initial else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state end elsif match = scan(/ [ \t]+ | \\?\n /x) - tokens << [match, :space] + encoder.text_token match, :space if match == "\n" state = :initial if state == :include_expected docstring_coming = true if match?(/[ \t]*u?r?"""/) @@ -149,28 +146,28 @@ module Scanners next elsif match = scan(/ \# [^\n]* /mx) - tokens << [match, :comment] + encoder.text_token match, :comment next elsif state == :initial - if scan(/#{OPERATOR}/o) - kind = :operator + if match = scan(/#{OPERATOR}/o) + encoder.text_token match, :operator elsif match = scan(/(u?r?|b)?("""|"|'''|')/i) string_delimiter = self[2] string_type = docstring_coming ? :docstring : :string docstring_coming = false if docstring_coming - tokens << [:open, string_type] + encoder.begin_group string_type string_raw = false modifiers = self[1] unless modifiers.empty? string_raw = !!modifiers.index(?r) - tokens << [modifiers, :modifier] + encoder.text_token modifiers, :modifier match = string_delimiter end state = :string - kind = :delimiter + encoder.text_token match, :delimiter # TODO: backticks @@ -186,43 +183,45 @@ module Scanners state = DEF_NEW_STATE[match] from_import_state << match.to_sym if state == :include_expected end + encoder.text_token match, kind - elsif scan(/@[a-zA-Z0-9_.]+[lL]?/) - kind = :decorator + elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/) + encoder.text_token match, :decorator - elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/) - kind = :hex + elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/) + encoder.text_token match, :hex - elsif scan(/0[bB][01]+[lL]?/) - kind = :bin + elsif match = scan(/0[bB][01]+[lL]?/) + encoder.text_token match, :bin elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) - kind = :float if scan(/[jJ]/) match << matched - kind = :imaginary + encoder.text_token match, :imaginary + else + encoder.text_token match, :float end - elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/) - kind = :oct + elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/) + encoder.text_token match, :oct elsif match = scan(/\d+([lL])?/) - kind = :integer if self[1] == nil && scan(/[jJ]/) match << matched - kind = :imaginary + encoder.text_token match, :imaginary + else + encoder.text_token match, :integer end else - getch - kind = :error + encoder.text_token getch, :error end elsif state == :def_expected state = :initial if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) - kind = :method + encoder.text_token match, :method else next end @@ -230,33 +229,34 @@ module Scanners elsif state == :class_expected state = :initial if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) - kind = :class + encoder.text_token match, :class else next end elsif state == :include_expected if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o) - kind = :include if match == 'as' - kind = :keyword + encoder.text_token match, :keyword from_import_state << :as elsif from_import_state.first == :from && match == 'import' - kind = :keyword + encoder.text_token match, :keyword from_import_state << :import elsif from_import_state.last == :as - # kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method - kind = :ident + # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method + encoder.text_token match, :ident from_import_state.pop elsif IDENT_KIND[match] == :keyword unscan match = nil state = :initial next + else + encoder.text_token match, :include end elsif match = scan(/,/) from_import_state.pop if from_import_state.last == :as - kind = :operator + encoder.text_token match, :operator else from_import_state = [] state = :initial @@ -264,28 +264,19 @@ module Scanners end else - raise_inspect 'Unknown state', tokens, state + raise_inspect 'Unknown state', encoder, state end - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens, state - end - raise_inspect 'Empty token', tokens, state unless match - last_token_dot = match == '.' - tokens << [match, kind] - end if state == :string - tokens << [:close, string_type] + encoder.end_group string_type end - tokens + encoder end end diff --git a/lib/coderay/scanners/rhtml.rb b/lib/coderay/scanners/rhtml.rb index 01fda8e..064a92c 100644 --- a/lib/coderay/scanners/rhtml.rb +++ b/lib/coderay/scanners/rhtml.rb @@ -1,18 +1,18 @@ module CodeRay module Scanners - + load :html load :ruby - + # Scanner for HTML ERB templates. class RHTML < Scanner - + include Streamable register_for :rhtml title 'HTML ERB Template' KINDS_NOT_LOC = HTML::KINDS_NOT_LOC - + ERB_RUBY_BLOCK = / <%(?!%)[=-]? (?> @@ -24,51 +24,51 @@ module Scanners ) (?: -?%> )? /x # :nodoc: - + START_OF_ERB = / <%(?!%) /x # :nodoc: - + protected - + def setup @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true end - + def reset_instance super @html_scanner.reset end - - def scan_tokens tokens, options - + + def scan_tokens encoder, options + until eos? - + if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty? - @html_scanner.tokenize match - + @html_scanner.tokenize match, :tokens => encoder + elsif match = scan(/#{ERB_RUBY_BLOCK}/o) start_tag = match[/\A<%[-=]?/] end_tag = match[/-?%?>?\z/] - tokens << [:open, :inline] - tokens << [start_tag, :inline_delimiter] + encoder.begin_group :inline + encoder.text_token start_tag, :inline_delimiter code = match[start_tag.size .. -1 - end_tag.size] @ruby_scanner.tokenize code - tokens << [end_tag, :inline_delimiter] unless end_tag.empty? - tokens << [:close, :inline] - + encoder.text_token end_tag, :inline_delimiter unless end_tag.empty? + encoder.end_group :inline + else - raise_inspect 'else-case reached!', tokens + raise_inspect 'else-case reached!', encoder end - + end - - tokens - + + encoder + end - + end - + end end diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 0e8e802..dcbfce0 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -30,7 +30,7 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options patterns = Patterns # avoid constant lookup @@ -50,20 +50,18 @@ module Scanners unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' until eos? - match = nil - kind = nil if state.instance_of? patterns::StringState match = scan_until(state.pattern) || scan_until(/\z/) - tokens << [match, :content] unless match.empty? + encoder.text_token match, :content unless match.empty? break if eos? if state.heredoc and self[1] # end of heredoc match = getch.to_s match << scan_until(/$/) unless eos? - tokens << [match, :delimiter] - tokens << [:close, state.type] + encoder.text_token match, :delimiter + encoder.end_group state.type state = state.next_state next end @@ -74,34 +72,34 @@ module Scanners if state.paren_depth state.paren_depth -= 1 if state.paren_depth > 0 - tokens << [match, :nesting_delimiter] + encoder.text_token match, :nesting_delimiter next end end - tokens << [match, :delimiter] + encoder.text_token match, :delimiter if state.type == :regexp and not eos? modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox) - tokens << [modifiers, :modifier] unless modifiers.empty? + encoder.text_token modifiers, :modifier unless modifiers.empty? end - tokens << [:close, state.type] + encoder.end_group state.type value_expected = false state = state.next_state when '\\' if state.interpreted if esc = scan(/ #{patterns::ESCAPE} /ox) - tokens << [match + esc, :char] + encoder.text_token match + esc, :char else - tokens << [match, :error] + encoder.text_token match, :error end else case m = getch when state.delim, '\\' - tokens << [match + m, :char] + encoder.text_token match + m, :char when nil - tokens << [match, :error] + encoder.text_token match, :error else - tokens << [match + m, :content] + encoder.text_token match + m, :content end end @@ -113,42 +111,38 @@ module Scanners value_expected = true state = :initial inline_block_curly_depth = 1 - tokens << [:open, :inline] - tokens << [match + getch, :inline_delimiter] + encoder.begin_group :inline + encoder.text_token match + getch, :inline_delimiter when '$', '@' - tokens << [match, :escape] + encoder.text_token match, :escape last_state = state state = :initial else raise_inspect 'else-case # reached; #%p not handled' % - [peek(1)], tokens + [peek(1)], encoder end when state.opening_paren state.paren_depth += 1 - tokens << [match, :nesting_delimiter] + encoder.text_token match, :nesting_delimiter when /#{patterns::REGEXP_SYMBOLS}/ox - tokens << [match, :function] + encoder.text_token match, :function else raise_inspect 'else-case " reached; %p not handled, state = %p' % - [match, state], tokens + [match, state], encoder end - next else if match = scan(/[ \t\f]+/) - kind = :space match << scan(/\s*/) unless eos? || heredocs value_expected = true if match.index(?\n) - tokens << [match, kind] - next + encoder.text_token match, :space elsif match = scan(/\\?\n/) - kind = :space if match == "\n" value_expected = true state = :initial if state == :undef_comma_expected @@ -156,24 +150,20 @@ module Scanners if heredocs unscan # heredoc scanning needs \n at start state = heredocs.shift - tokens << [:open, state.type] + encoder.begin_group state.type heredocs = nil if heredocs.empty? next else match << scan(/\s*/) unless eos? end - tokens << [match, kind] - next + encoder.text_token match, :space elsif bol? && match = scan(/\#!.*/) - tokens << [match, :doctype] - next + encoder.text_token match, :doctype elsif match = scan(/\#.*/) or (bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o)) - kind = :comment - tokens << [match, kind] - next + encoder.text_token match, :comment elsif state == :initial @@ -192,16 +182,16 @@ module Scanners value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match] end value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o) + encoder.text_token match, kind elsif method_call_expected and match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo : /#{patterns::METHOD_AFTER_DOT}/o) - kind = - if method_call_expected == '::' && match[/^[A-Z]/] && !match?(/\(/) - :constant - else - :ident - end + if method_call_expected == '::' && match[/^[A-Z]/] && !match?(/\(/) + encoder.text_token match, :constant + else + encoder.text_token match, :ident + end method_call_expected = false value_expected = check(/#{patterns::VALUE_FOLLOWS}/o) @@ -209,7 +199,6 @@ module Scanners elsif not method_call_expected and match = scan(/ \.\.\.? | (\.|::) | [,\(\)\[\]\{\}] | ==?=? /x) value_expected = match !~ / [.\)\]\}] /x || match =~ /\A\.\./ method_call_expected = self[1] - kind = :operator if inline_block_stack case match when '{' @@ -220,35 +209,36 @@ module Scanners state, inline_block_curly_depth, heredocs = inline_block_stack.pop inline_block_stack = nil if inline_block_stack.empty? heredocs = nil if heredocs && heredocs.empty? - tokens << [match, :inline_delimiter] - kind = :inline - match = :close + encoder.text_token match, :inline_delimiter + encoder.end_group :inline + next end end end + encoder.text_token match, :operator elsif match = scan(/ ['"] /mx) - tokens << [:open, :string] - kind = :delimiter + encoder.begin_group :string + encoder.text_token match, :delimiter state = patterns::StringState.new :string, match == '"', match # important for streaming elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo : /#{patterns::INSTANCE_VARIABLE}/o) value_expected = false - kind = :instance_variable + encoder.text_token match, :instance_variable elsif value_expected and match = scan(/\//) - tokens << [:open, :regexp] - kind = :delimiter + encoder.begin_group :regexp + encoder.text_token match, :delimiter interpreted = true state = patterns::StringState.new :regexp, interpreted, match - elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o) + elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o) if method_call_expected - kind = :error + encoder.text_token match, :error method_call_expected = false else - kind = self[1] ? :float : :integer + encoder.text_token match, self[1] ? :float : :integer end value_expected = false @@ -256,28 +246,28 @@ module Scanners /#{patterns::SYMBOL}/o) case delim = match[1] when ?', ?" - tokens << [:open, :symbol] - tokens << [':', :symbol] + encoder.begin_group :symbol + encoder.text_token ':', :symbol match = delim.chr - kind = :delimiter + encoder.text_token match, :delimiter state = patterns::StringState.new :symbol, delim == ?", match else - kind = :symbol + encoder.text_token match, :symbol value_expected = false end elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) value_expected = true - kind = :operator + encoder.text_token match, :operator elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o) indented = self[1] == '-' quote = self[3] delim = self[quote ? 4 : 2] kind = patterns::QUOTE_TO_TYPE[quote] - tokens << [:open, kind] - tokens << [match, :delimiter] - match = :close + encoder.begin_group kind + encoder.text_token match, :delimiter + encoder.end_group kind heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart ) heredocs ||= [] # create heredocs if empty @@ -286,38 +276,38 @@ module Scanners elsif value_expected and match = scan(/#{patterns::FANCY_START}/o) kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do - raise_inspect 'Unknown fancy string: %%%p' % k, tokens + raise_inspect 'Unknown fancy string: %%%p' % k, encoder end - tokens << [:open, kind] + encoder.begin_group kind state = patterns::StringState.new kind, interpreted, self[2] - kind = :delimiter + encoder.text_token match, :delimiter elsif value_expected and match = scan(/#{patterns::CHARACTER}/o) value_expected = false - kind = :integer + encoder.text_token match, :integer elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) value_expected = true - kind = :operator + encoder.text_token match, :operator elsif match = scan(/`/) if method_call_expected - kind = :operator + encoder.text_token match, :operator value_expected = true else - tokens << [:open, :shell] - kind = :delimiter + encoder.begin_group :shell + encoder.text_token match, :delimiter state = patterns::StringState.new :shell, true, match end elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo : /#{patterns::GLOBAL_VARIABLE}/o) - kind = :global_variable + encoder.text_token match, :global_variable value_expected = false elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo : /#{patterns::CLASS_VARIABLE}/o) - kind = :class_variable + encoder.text_token match, :class_variable value_expected = false else @@ -340,9 +330,9 @@ module Scanners end next if unicode end - kind = :error - match = getch - + + encoder.text_token getch, :error + end if last_state @@ -353,34 +343,30 @@ module Scanners elsif state == :def_expected if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) - kind = :method + encoder.text_token match, :method state = :initial else last_state = :dot_expected state = :initial - next end elsif state == :dot_expected if match = scan(/\.|::/) # invalid definition state = :def_expected - kind = :operator + encoder.text_token match, :operator else state = :initial - next end elsif state == :module_expected if match = scan(/<</) - kind = :operator + encoder.text_token match, :operator else state = :initial if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux : / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox) - kind = :class - else - next + encoder.text_token match, :class end end @@ -388,31 +374,29 @@ module Scanners state = :undef_comma_expected if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) - kind = :method + encoder.text_token match, :method elsif match = scan(/#{patterns::SYMBOL}/o) case delim = match[1] when ?', ?" - tokens << [:open, :symbol] - tokens << [':', :symbol] + encoder.begin_group :symbol + encoder.text_token ':', :symbol match = delim.chr - kind = :delimiter + encoder.text_token match, :delimiter state = patterns::StringState.new :symbol, delim == ?", match state.next_state = :undef_comma_expected else - kind = :symbol + encoder.text_token match, :symbol end else state = :initial - next end elsif state == :undef_comma_expected if match = scan(/,/) - kind = :operator + encoder.text_token match, :operator state = :undef_expected else state = :initial - next end elsif state == :alias_expected @@ -420,38 +404,30 @@ module Scanners /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o) if match - tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)] - tokens << [self[2], :space] - tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)] + encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method) + encoder.text_token self[2], :space + encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method) end state = :initial - next end - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens, state - end - raise_inspect 'Empty token', tokens, state unless match - - tokens << [match, kind] end end # cleaning up if state.is_a? patterns::StringState - tokens << [:close, state.type] + encoder.end_group state.type end if inline_block_stack until inline_block_stack.empty? state, *more = inline_block_stack.pop - tokens << [:close, :inline] if more - tokens << [:close, state.type] + encoder.end_group :inline if more + encoder.end_group state.type end end - tokens + encoder end end diff --git a/lib/coderay/scanners/scheme.rb b/lib/coderay/scanners/scheme.rb index cbd9729..c29641e 100644 --- a/lib/coderay/scanners/scheme.rb +++ b/lib/coderay/scanners/scheme.rb @@ -72,74 +72,63 @@ module CodeRay protected - def scan_tokens tokens, options + def scan_tokens encoder, options state = :initial ident_kind = IDENT_KIND until eos? - kind = match = nil case state when :initial - if scan(/ \s+ | \\\n /x) - kind = :space - elsif scan(/['\(\[\)\]]|#\(/) - kind = :operator # FIXME: was :operator_fat - elsif scan(/;.*/) - kind = :comment - elsif scan(/#\\(?:newline|space|.?)/) - kind = :char - elsif scan(/#[ft]/) - kind = :pre_constant - elsif scan(/#{IDENTIFIER}/o) - kind = ident_kind[matched] - elsif scan(/\./) - kind = :operator - elsif scan(/"/) - tokens << [:open, :string] + if match = scan(/ \s+ | \\\n /x) + encoder.text_token match, :space + elsif match = scan(/['\(\[\)\]]|#\(/) + encoder.text_token match, :operator # FIXME: was :operator_fat + elsif match = scan(/;.*/) + encoder.text_token match, :comment + elsif match = scan(/#\\(?:newline|space|.?)/) + encoder.text_token match, :char + elsif match = scan(/#[ft]/) + encoder.text_token match, :pre_constant + elsif match = scan(/#{IDENTIFIER}/o) + encoder.text_token match, ident_kind[matched] + elsif match = scan(/\./) + encoder.text_token match, :operator + elsif match = scan(/"/) + encoder.begin_group :string + encoder.text_token match, :delimiter state = :string - tokens << ['"', :delimiter] - next - elsif scan(/#{NUM}/o) and not matched.empty? - kind = :integer - elsif getch - kind = :error + elsif match = scan(/#{NUM}/o) and not matched.empty? + encoder.text_token match, :integer + else + encoder.text_token getch, :error end when :string - if scan(/[^"\\]+/) or scan(/\\.?/) - kind = :content - elsif scan(/"/) - tokens << ['"', :delimiter] - tokens << [:close, :string] + if match = scan(/[^"\\]+|\\.?/) + encoder.text_token match, :content + elsif match = scan(/"/) + encoder.text_token match, :delimiter + encoder.end_group :string state = :initial - next else raise_inspect "else case \" reached; %p not handled." % peek(1), - tokens, state + encoder, state end else - raise "else case reached" - end - - match ||= matched - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens + raise 'else case reached' + end - raise_inspect 'Empty token', tokens, state unless match - - tokens << [match, kind] end if state == :string - tokens << [:close, :string] + encoder.end_group state end - tokens + encoder end end diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb index 3aeea77..d62a2c3 100644 --- a/lib/coderay/scanners/sql.rb +++ b/lib/coderay/scanners/sql.rb @@ -51,7 +51,7 @@ module CodeRay module Scanners STRING_PREFIXES = /[xnb]|_\w+/i - def scan_tokens tokens, options + def scan_tokens encoder, options state = :initial string_type = nil @@ -59,54 +59,50 @@ module CodeRay module Scanners until eos? - kind = nil - match = nil - if state == :initial - if scan(/ \s+ | \\\n /x) - kind = :space + if match = scan(/ \s+ | \\\n /x) + encoder.text_token match, :space - elsif scan(/^(?:--\s?|#).*/) - kind = :comment + elsif match = scan(/^(?:--\s?|#).*/) + encoder.text_token match, :comment - elsif scan(%r( /\* (!)? (?: .*? \*/ | .* ) )mx) - kind = self[1] ? :directive : :comment + elsif match = scan(%r( /\* (!)? (?: .*? \*/ | .* ) )mx) + encoder.text_token match, self[1] ? :directive : :comment - elsif scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x) - kind = :operator + elsif match = scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x) + encoder.text_token match, :operator - elsif scan(/(#{STRING_PREFIXES})?([`"'])/o) + elsif match = scan(/(#{STRING_PREFIXES})?([`"'])/o) prefix = self[1] string_type = self[2] - tokens << [:open, :string] - tokens << [prefix, :modifier] if prefix + encoder.begin_group :string + encoder.text_token prefix, :modifier if prefix match = string_type state = :string - kind = :delimiter + encoder.text_token match, :delimiter elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x) # FIXME: Don't match keywords after "." - kind = match[0] == ?@ ? :variable : IDENT_KIND[match.downcase] + encoder.text_token match, match[0] == ?@ ? :variable : IDENT_KIND[match.downcase] - elsif scan(/0[xX][0-9A-Fa-f]+/) - kind = :hex + elsif match = scan(/0[xX][0-9A-Fa-f]+/) + encoder.text_token match, :hex - elsif scan(/0[0-7]+(?![89.eEfF])/) - kind = :oct + elsif match = scan(/0[0-7]+(?![89.eEfF])/) + encoder.text_token match, :oct - elsif scan(/(?>\d+)(?![.eEfF])/) - kind = :integer + elsif match = scan(/(?>\d+)(?![.eEfF])/) + encoder.text_token match, :integer - elsif scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) - kind = :float + elsif match = scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) + encoder.text_token match, :float - elsif scan(/\\N/) - kind = :pre_constant + elsif match = scan(/\\N/) + encoder.text_token match, :pre_constant else - getch - kind = :error + encoder.text_token getch, :error end @@ -121,54 +117,48 @@ module CodeRay module Scanners next end unless string_content.empty? - tokens << [string_content, :content] + encoder.text_token string_content, :content string_content = '' end - tokens << [matched, :delimiter] - tokens << [:close, :string] + encoder.text_token match, :delimiter + encoder.end_group :string state = :initial string_type = nil - next else string_content << match end - next - elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) unless string_content.empty? - tokens << [string_content, :content] + encoder.text_token string_content, :content string_content = '' end - kind = :char + encoder.text_token match, :char elsif match = scan(/ \\ . /mox) string_content << match next - elsif scan(/ \\ | $ /x) + elsif match = scan(/ \\ | $ /x) unless string_content.empty? - tokens << [string_content, :content] + encoder.text_token string_content, :content string_content = '' end - kind = :error + encoder.text_token match, :error state = :initial else - raise "else case \" reached; %p not handled." % peek(1), tokens + raise "else case \" reached; %p not handled." % peek(1), encoder end else - raise 'else-case reached', tokens + raise 'else-case reached', encoder end - match ||= matched - unless kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens, state - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - end - tokens + + if state == :string + encoder.end_group state + end + + encoder end diff --git a/lib/coderay/scanners/yaml.rb b/lib/coderay/scanners/yaml.rb index 62a6aba..3c3928f 100644 --- a/lib/coderay/scanners/yaml.rb +++ b/lib/coderay/scanners/yaml.rb @@ -13,7 +13,7 @@ module Scanners protected - def scan_tokens tokens, options + def scan_tokens encoder, options value_expected = nil state = :initial @@ -21,50 +21,48 @@ module Scanners until eos? - kind = nil - match = nil key_indent = nil if bol? if match = scan(/ +[\t ]*/) - kind = :space + encoder.text_token match, :space elsif match = scan(/\n+/) - kind = :space + encoder.text_token match, :space state = :initial if match.index(?\n) elsif match = scan(/#.*/) - kind = :comment + encoder.text_token match, :comment elsif bol? and case when match = scan(/---|\.\.\./) - tokens << [:open, :head] - tokens << [match, :head] - tokens << [:close, :head] + encoder.begin_group :head + encoder.text_token match, :head + encoder.end_group :head next when match = scan(/%.*/) - tokens << [match, :doctype] + encoder.text_token match, :doctype next end elsif state == :value and case - when !check(/(?:"[^"]*")(?=: |:$)/) && scan(/"/) - tokens << [:open, :string] - tokens << [matched, :delimiter] - tokens << [matched, :content] if scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx) - tokens << [matched, :delimiter] if scan(/"/) - tokens << [:close, :string] + when !check(/(?:"[^"]*")(?=: |:$)/) && match = scan(/"/) + encoder.begin_group :string + encoder.text_token match, :delimiter + encoder.text_token match, :content if match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx) + encoder.text_token match, :delimiter if match = scan(/"/) + encoder.end_group :string next when match = scan(/[|>][-+]?/) - tokens << [:open, :string] - tokens << [match, :delimiter] + encoder.begin_group :string + encoder.text_token match, :delimiter string_indent = key_indent || column(pos - match.size - 1) - tokens << [matched, :content] if scan(/(?:\n+ {#{string_indent + 1}}.*)+/) - tokens << [:close, :string] + encoder.text_token matched, :content if scan(/(?:\n+ {#{string_indent + 1}}.*)+/) + encoder.end_group :string next when match = scan(/(?![!"*&]).+?(?=$|\s+#)/) - tokens << [match, :string] + encoder.text_token match, :string string_indent = key_indent || column(pos - match.size - 1) - tokens << [matched, :string] if scan(/(?:\n+ {#{string_indent + 1}}.*)+/) + encoder.text_token matched, :string if scan(/(?:\n+ {#{string_indent + 1}}.*)+/) next end @@ -72,68 +70,69 @@ module Scanners when match = scan(/[-:](?= |$)/) state = :value if state == :colon && (match == ':' || match == '-') state = :value if state == :initial && match == '-' - kind = :operator + encoder.text_token match, :operator + next when match = scan(/[,{}\[\]]/) - kind = :operator + encoder.text_token match, :operator + next when state == :initial && match = scan(/[\w.() ]*\S(?=: |:$)/) - kind = :key + encoder.text_token match, :key key_indent = column(pos - match.size - 1) - # tokens << [key_indent.inspect, :debug] + # encoder.text_token key_indent.inspect, :debug state = :colon + next when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?=: |:$)/) - tokens << [:open, :key] - tokens << [match[0,1], :delimiter] - tokens << [match[1..-2], :content] - tokens << [match[-1,1], :delimiter] - tokens << [:close, :key] + encoder.begin_group :key + encoder.text_token match[0,1], :delimiter + encoder.text_token match[1..-2], :content + encoder.text_token match[-1,1], :delimiter + encoder.end_group :key key_indent = column(pos - match.size - 1) - # tokens << [key_indent.inspect, :debug] + # encoder.text_token key_indent.inspect, :debug state = :colon next - when scan(/(![\w\/]+)(:([\w:]+))?/) - tokens << [self[1], :type] + when match = scan(/(![\w\/]+)(:([\w:]+))?/) + encoder.text_token self[1], :type if self[2] - tokens << [':', :operator] - tokens << [self[3], :class] + encoder.text_token ':', :operator + encoder.text_token self[3], :class end next - when scan(/&\S+/) - kind = :variable - when scan(/\*\w+/) - kind = :global_variable - when scan(/<</) - kind = :class_variable - when scan(/\d\d:\d\d:\d\d/) - kind = :oct - when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/) - kind = :oct - when scan(/:\w+/) - kind = :symbol - when scan(/[^:\s]+(:(?! |$)[^:\s]*)* .*/) - kind = :error - when scan(/[^:\s]+(:(?! |$)[^:\s]*)*/) - kind = :error + when match = scan(/&\S+/) + encoder.text_token match, :variable + next + when match = scan(/\*\w+/) + encoder.text_token match, :global_variable + next + when match = scan(/<</) + encoder.text_token match, :class_variable + next + when match = scan(/\d\d:\d\d:\d\d/) + encoder.text_token match, :oct + next + when match = scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/) + encoder.text_token match, :oct + next + when match = scan(/:\w+/) + encoder.text_token match, :symbol + next + when match = scan(/[^:\s]+(:(?! |$)[^:\s]*)* .*/) + encoder.text_token match, :error + next + when match = scan(/[^:\s]+(:(?! |$)[^:\s]*)*/) + encoder.text_token match, :error + next end else - getch - kind = :error + raise if eos? + encoder.text_token getch, :error end - match ||= matched - - if $CODERAY_DEBUG and not kind - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens, state - end - raise_inspect 'Empty token', tokens, state unless match - - tokens << [match, kind] - end - tokens + encoder end end diff --git a/lib/coderay/token_kinds.rb b/lib/coderay/token_kinds.rb index 3e63372..9904d50 100755 --- a/lib/coderay/token_kinds.rb +++ b/lib/coderay/token_kinds.rb @@ -79,7 +79,6 @@ module CodeRay :plain => :NO_HIGHLIGHT, } AbbreviationForKind[:method] = AbbreviationForKind[:function] - AbbreviationForKind[:open] = AbbreviationForKind[:close] = AbbreviationForKind[:delimiter] AbbreviationForKind[:nesting_delimiter] = AbbreviationForKind[:delimiter] AbbreviationForKind[:escape] = AbbreviationForKind[:delimiter] AbbreviationForKind[:docstring] = AbbreviationForKind[:comment] diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb index 2a0dc15..c85c2f1 100644 --- a/lib/coderay/tokens.rb +++ b/lib/coderay/tokens.rb @@ -1,6 +1,6 @@ module CodeRay - # = Tokens + # = Tokens TODO: Rewrite! # # The Tokens class represents a list of tokens returnd from # a Scanner. @@ -8,7 +8,7 @@ module CodeRay # A token is not a special object, just a two-element Array # consisting of # * the _token_ _text_ (the original source of the token in a String) or - # a _token_ _action_ (:open, :close, :begin_line, :end_line) + # a _token_ _action_ (begin_group, end_group, begin_line, end_line) # * the _token_ _kind_ (a Symbol representing the type of the token) # # A token looks like this: @@ -18,16 +18,16 @@ module CodeRay # ['$^', :error] # # Some scanners also yield sub-tokens, represented by special - # token actions, namely :open and :close. + # token actions, namely begin_group and end_group. # # The Ruby scanner, for example, splits "a string" into: # # [ - # [:open, :string], + # [:begin_group, :string], # ['"', :delimiter], # ['a string', :content], # ['"', :delimiter], - # [:close, :string] + # [:end_group, :string] # ] # # Tokens is the interface between Scanners and Encoders: @@ -47,20 +47,11 @@ module CodeRay # # It also allows you to generate tokens directly (without using a scanner), # to load them from a file, and still use any Encoder that CodeRay provides. - # - # Tokens' subclass TokenStream allows streaming to save memory. class Tokens < Array # The Scanner instance that created the tokens. attr_accessor :scanner - # Whether the object is a TokenStream. - # - # Returns false. - def stream? - false - end - # Iterates over all tokens. # # If a filter is given, only tokens of that kind are yielded. @@ -76,7 +67,7 @@ module CodeRay end # Iterates over all text tokens. - # Range tokens like [:open, :string] are left out. + # Token actions are left out. # # Example: # tokens.each_text_token { |text, kind| text.replace html_escape(text) } @@ -117,9 +108,13 @@ module CodeRay # For example, if you call +tokens.html+, the HTML encoder # is used to highlight the tokens. def method_missing meth, options = {} - Encoders[meth].new(options).encode_tokens self + encode_with meth, options end - + + def encode_with encoder, options = {} + Encoders[encoder].new(options).encode_tokens self + end + # Returns the tokens compressed by joining consecutive # tokens of the same kind. # @@ -158,7 +153,7 @@ module CodeRay replace optimize end - # Ensure that all :open tokens have a correspondent :close one. + # Ensure that all begin_group tokens have a correspondent end_group. # # TODO: Test this! def fix @@ -167,15 +162,15 @@ module CodeRay opened = [] for type, kind in self case type - when :open - opened.push [:close, kind] + when :begin_group + opened.push [:begin_group, kind] when :begin_line opened.push [:end_line, kind] - when :close, :end_line + when :end_group, :end_line expected = opened.pop if [type, kind] != expected - # Unexpected :close; decide what to do based on the kind: - # - token was never opened: delete the :close (just skip it) + # Unexpected end; decide what to do based on the kind: + # - token was never opened: delete the end (just skip it) next unless opened.rindex expected # - token was opened earlier: also close tokens in between tokens << token until (token = opened.pop) == expected @@ -230,6 +225,11 @@ module CodeRay dump = dump.gzip gzip_level dump.extend Undumping end + + # Return the actual number of tokens. + def count + size / 2 + end # The total size of the tokens. # Should be equal to the input size before @@ -242,9 +242,7 @@ module CodeRay size end - # The total size of the tokens. - # Should be equal to the input size before - # scanning. + # Return all text tokens joined into a single string. def text map { |t, k| t if t.is_a? ::String }.join end @@ -271,77 +269,12 @@ module CodeRay @dump = Marshal.load dump end - end - - - # = TokenStream - # - # The TokenStream class is a fake Array without elements. - # - # It redirects the method << to a block given at creation. - # - # This allows scanners and Encoders to use streaming (no - # tokens are saved, the input is highlighted the same time it - # is scanned) with the same code. - # - # See CodeRay.encode_stream and CodeRay.scan_stream - class TokenStream < Tokens - - # Whether the object is a TokenStream. - # - # Returns true. - def stream? - true - end - - # The Array is empty, but size counts the tokens given by <<. - attr_reader :size - - # Creates a new TokenStream that calls +block+ whenever - # its << method is called. - # - # Example: - # - # require 'coderay' - # - # token_stream = CodeRay::TokenStream.new do |text, kind| - # puts 'kind: %s, text size: %d.' % [kind, text.size] - # end - # - # token_stream << ['/\d+/', :regexp] - # #-> kind: rexpexp, text size: 5. - # - def initialize &block - raise ArgumentError, 'Block expected for streaming.' unless block - @callback = block - @size = 0 - end - - # Calls +block+ with +token+ and increments size. - # - # Returns self. - def << token - @callback.call(*token) - @size += 1 - self - end - - # This method is not implemented due to speed reasons. Use Tokens. - def text_size - raise NotImplementedError, - 'This method is not implemented due to speed reasons.' - end - - # A TokenStream cannot be dumped. Use Tokens. - def dump - raise NotImplementedError, 'A TokenStream cannot be dumped.' - end - - # A TokenStream cannot be optimized. Use Tokens. - def optimize - raise NotImplementedError, 'A TokenStream cannot be optimized.' - end - + alias text_token push + def begin_group kind; push :begin_group, kind end + def end_group kind; push :end_group, kind end + def begin_line kind; push :begin_line, kind end + def end_line kind; push :end_line, kind end + end end @@ -369,17 +302,18 @@ class TokensTest < Test::Unit::TestCase def test_adding_tokens tokens = CodeRay::Tokens.new assert_nothing_raised do - tokens << ['string', :type] - tokens << ['()', :operator] + tokens.text_token 'string', :type + tokens.text_token '()', :operator end - assert_equal tokens.size, 2 + assert_equal tokens.size, 4 + assert_equal tokens.count, 2 end def test_dump_undump tokens = CodeRay::Tokens.new assert_nothing_raised do - tokens << ['string', :type] - tokens << ['()', :operator] + tokens.text_token 'string', :type + tokens.text_token '()', :operator end tokens2 = nil assert_nothing_raised do |