summaryrefslogtreecommitdiff
path: root/lib/coderay
diff options
context:
space:
mode:
Diffstat (limited to 'lib/coderay')
-rw-r--r--lib/coderay/encoder.rb117
-rw-r--r--lib/coderay/encoders/count.rb44
-rw-r--r--lib/coderay/encoders/debug.rb36
-rw-r--r--lib/coderay/encoders/filter.rb26
-rw-r--r--lib/coderay/encoders/html.rb171
-rw-r--r--lib/coderay/encoders/json.rb18
-rw-r--r--lib/coderay/encoders/lines_of_code.rb6
-rw-r--r--lib/coderay/encoders/statistic.rb85
-rw-r--r--lib/coderay/encoders/terminal.rb95
-rw-r--r--lib/coderay/encoders/text.rb8
-rw-r--r--lib/coderay/encoders/token_kind_filter.rb30
-rw-r--r--lib/coderay/encoders/xml.rb12
-rw-r--r--lib/coderay/for_redcloth.rb2
-rw-r--r--lib/coderay/scanner.rb21
-rw-r--r--lib/coderay/scanners/c.rb99
-rw-r--r--lib/coderay/scanners/cpp.rb108
-rw-r--r--lib/coderay/scanners/css.rb136
-rw-r--r--lib/coderay/scanners/debug.rb89
-rw-r--r--lib/coderay/scanners/delphi.rb110
-rw-r--r--lib/coderay/scanners/diff.rb96
-rw-r--r--lib/coderay/scanners/groovy.rb189
-rw-r--r--lib/coderay/scanners/html.rb164
-rw-r--r--lib/coderay/scanners/java.rb95
-rw-r--r--lib/coderay/scanners/java_script.rb150
-rw-r--r--lib/coderay/scanners/json.rb77
-rw-r--r--lib/coderay/scanners/nitro_xhtml.rb78
-rw-r--r--lib/coderay/scanners/php.rb233
-rw-r--r--lib/coderay/scanners/plaintext.rb5
-rw-r--r--lib/coderay/scanners/python.rb115
-rw-r--r--lib/coderay/scanners/rhtml.rb52
-rw-r--r--lib/coderay/scanners/ruby.rb186
-rw-r--r--lib/coderay/scanners/scheme.rb75
-rw-r--r--lib/coderay/scanners/sql.rb94
-rw-r--r--lib/coderay/scanners/yaml.rb129
-rwxr-xr-xlib/coderay/token_kinds.rb1
-rw-r--r--lib/coderay/tokens.rb138
36 files changed, 1460 insertions, 1630 deletions
diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb
index 3ae2924..82545c4 100644
--- a/lib/coderay/encoder.rb
+++ b/lib/coderay/encoder.rb
@@ -31,11 +31,6 @@ module CodeRay
class << self
- # Returns if the Encoder can be used in streaming mode.
- def streamable?
- is_a? Streamable
- end
-
# If FILE_EXTENSION isn't defined, this method returns the
# downcase class name instead.
def const_missing sym
@@ -69,6 +64,7 @@ module CodeRay
@options = self.class::DEFAULT_OPTIONS.merge options
raise "I am only the basic Encoder class. I can't encode "\
"anything. :( Use my subclasses." if self.class == Encoder
+ $ALREADY_WARNED_OLD_INTERFACE = false
end
# Encode a Tokens object.
@@ -95,24 +91,25 @@ module CodeRay
# Encode the given +code+ using the Scanner for +lang+ in
# streaming mode.
def encode_stream code, lang, options = {}
- raise NotStreamableError, self unless kind_of? Streamable
options = @options.merge options
setup options
scanner_options = CodeRay.get_scanner_options options
+ scanner_options[:tokens] = self
@token_stream =
- CodeRay.scan_stream code, lang, scanner_options, &self
+ CodeRay.scan_stream code, lang, scanner_options
finish options
end
- # Behave like a proc. The token method is converted to a proc.
- def to_proc
- method(:token).to_proc
- end
-
# Return the default file extension for outputs of this encoder.
def file_extension
self.class::FILE_EXTENSION
end
+
+ def << token
+ warn 'Using old Tokens#<< interface.' unless $ALREADY_WARNED_OLD_INTERFACE
+ $ALREADY_WARNED_OLD_INTERFACE = true
+ self.token(*token)
+ end
protected
@@ -123,90 +120,80 @@ module CodeRay
def setup options
@out = ''
end
-
+
+ public
+
# Called with +content+ and +kind+ of the currently scanned token.
# For simple scanners, it's enougth to implement this method.
#
- # By default, it calls text_token or block_token, depending on
- # whether +content+ is a String.
+ # By default, it calls text_token, begin_group, end_group, begin_line,
+ # or end_line, depending on the +content+.
def token content, kind
- encoded_token =
- if content.is_a? ::String
- text_token content, kind
- elsif content.is_a? ::Symbol
- block_token content, kind
- else
- raise 'Unknown token content type: %p' % [content]
- end
- append_encoded_token_to_output encoded_token
- end
-
- def append_encoded_token_to_output encoded_token
- @out << encoded_token if encoded_token && defined?(@out) && @out
- end
-
- # Called for each text token ([text, kind]), where text is a String.
- def text_token text, kind
- end
-
- # Called for each block (non-text) token ([action, kind]),
- # where +action+ is a Symbol.
- #
- # Calls open_token, close_token, begin_line, and end_line according to
- # the value of +action+.
- def block_token action, kind
- case action
- when :open
- open_token kind
- when :close
- close_token kind
+ case content
+ when String
+ text_token content, kind
+ when :begin_group
+ begin_group kind
+ when :end_group
+ end_group kind
when :begin_line
begin_line kind
when :end_line
end_line kind
else
- raise 'unknown block action: %p' % action
+ raise 'Unknown token content type: %p' % [content]
end
end
- # Called for each block token at the start of the block ([:open, kind]).
- def open_token kind
+ # Called for each text token ([text, kind]), where text is a String.
+ def text_token text, kind
end
- # Called for each block token end of the block ([:close, kind]).
- def close_token kind
+ # Starts a token group with the given +kind+.
+ def begin_group kind
end
- # Called for each line token block at the start of the line ([:begin_line, kind]).
+ # Ends a token group with the given +kind+.
+ def end_group kind
+ end
+
+ # Starts a new line token group with the given +kind+.
def begin_line kind
end
- # Called for each line token block at the end of the line ([:end_line, kind]).
+ # Ends a new line token group with the given +kind+.
def end_line kind
end
-
+
+ protected
+
# Called with merged options after encoding starts.
# The return value is the result of encoding, typically @out.
def finish options
@out
end
-
+
# Do the encoding.
#
- # The already created +tokens+ object must be used; it can be a
- # TokenStream or a Tokens object.
- if RUBY_VERSION >= '1.9'
- def compile tokens, options
- for text, kind in tokens
- token text, kind
+ # The already created +tokens+ object must be used; it must be a
+ # Tokens object.
+ def compile tokens, options = {}
+ content = nil
+ for item in tokens
+ if item.is_a? Array
+ warn 'two-element array tokens are deprecated'
+ content, item = *item
+ end
+ if content
+ token content, item
+ content = nil
+ else
+ content = item
end
end
- else
- def compile tokens, options
- tokens.each(&self)
- end
+ raise if content
end
-
+
end
end
diff --git a/lib/coderay/encoders/count.rb b/lib/coderay/encoders/count.rb
index 2e60a89..451a7f8 100644
--- a/lib/coderay/encoders/count.rb
+++ b/lib/coderay/encoders/count.rb
@@ -1,25 +1,55 @@
+($:.unshift '../..'; require 'coderay') unless defined? CodeRay
module CodeRay
module Encoders
# Returns the number of tokens.
#
- # Text and block tokens (:open etc.) are counted.
+ # Text and block tokens are counted.
class Count < Encoder
-
+
include Streamable
register_for :count
-
+
protected
-
+
def setup options
@out = 0
end
-
- def token text, kind
+
+ def text_token text, kind
+ @out += 1
+ end
+
+ def begin_group kind
@out += 1
end
+ alias end_group begin_group
+ alias begin_line begin_group
+ alias end_line begin_group
end
-
+
end
end
+
+if $0 == __FILE__
+ $VERBOSE = true
+ $: << File.join(File.dirname(__FILE__), '..')
+ eval DATA.read, nil, $0, __LINE__ + 4
+end
+
+__END__
+require 'test/unit'
+
+class CountTest < Test::Unit::TestCase
+
+ def test_count
+ tokens = CodeRay.scan <<-RUBY.strip, :ruby
+#!/usr/bin/env ruby
+# a minimal Ruby program
+puts "Hello world!"
+ RUBY
+ assert_equal 9, tokens.encode_with(:count)
+ end
+
+end \ No newline at end of file
diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb
index 4c680d3..89e430f 100644
--- a/lib/coderay/encoders/debug.rb
+++ b/lib/coderay/encoders/debug.rb
@@ -19,31 +19,43 @@ module Encoders
register_for :debug
FILE_EXTENSION = 'raydebug'
+
+ def initialize options = {}
+ super
+ @opened = []
+ end
- protected
+ public
+
def text_token text, kind
if kind == :space
- text
+ @out << text
else
text = text.gsub(/[)\\]/, '\\\\\0') # escape ) and \
- "#{kind}(#{text})"
+ @out << kind.to_s << '(' << text << ')'
end
end
- def open_token kind
- "#{kind}<"
+ def begin_group kind
+ @opened << kind
+ @out << kind.to_s << '<'
end
- def close_token kind
- '>'
+ def end_group kind
+ if @opened.last != kind
+ puts @out
+ raise "we are inside #{@opened.inspect}, not #{kind}"
+ end
+ @opened.pop
+ @out << '>'
end
def begin_line kind
- "#{kind}["
+ @out << kind.to_s << '['
end
def end_line kind
- ']'
+ @out << ']'
end
end
@@ -74,16 +86,16 @@ class DebugEncoderTest < Test::Unit::TestCase
TEST_INPUT = CodeRay::Tokens[
['10', :integer],
['(\\)', :operator],
- [:open, :string],
+ [:begin_group, :string],
['test', :content],
- [:close, :string],
+ [:end_group, :string],
[:begin_line, :test],
["\n", :space],
["\n \t", :space],
[" \n", :space],
["[]", :method],
[:end_line, :test],
- ]
+ ].flatten
TEST_OUTPUT = <<-'DEBUG'.chomp
integer(10)operator((\\\))string<content(test)>test[
diff --git a/lib/coderay/encoders/filter.rb b/lib/coderay/encoders/filter.rb
index c1991cf..6b78ad3 100644
--- a/lib/coderay/encoders/filter.rb
+++ b/lib/coderay/encoders/filter.rb
@@ -16,15 +16,27 @@ module Encoders
end
def text_token text, kind
- [text, kind] if include_text_token? text, kind
+ @out.text_token text, kind if include_text_token? text, kind
end
def include_text_token? text, kind
true
end
- def block_token action, kind
- [action, kind] if include_block_token? action, kind
+ def begin_group kind
+ @out.begin_group kind if include_block_token? :begin_group, kind
+ end
+
+ def end_group kind
+ @out.end_group kind if include_block_token? :end_group, kind
+ end
+
+ def begin_line kind
+ @out.begin_line kind if include_block_token? :begin_line, kind
+ end
+
+ def end_line kind
+ @out.end_line kind if include_block_token? :end_line, kind
end
def include_block_token? action, kind
@@ -59,7 +71,7 @@ class FilterTest < Test::Unit::TestCase
def test_filtering_text_tokens
tokens = CodeRay::Tokens.new
10.times do |i|
- tokens << [i.to_s, :index]
+ tokens.text_token i.to_s, :index
end
assert_equal tokens, CodeRay::Encoders::Filter.new.encode_tokens(tokens)
assert_equal tokens, tokens.filter
@@ -68,9 +80,9 @@ class FilterTest < Test::Unit::TestCase
def test_filtering_block_tokens
tokens = CodeRay::Tokens.new
10.times do |i|
- tokens << [:open, :index]
- tokens << [i.to_s, :content]
- tokens << [:close, :index]
+ tokens.begin_group :index
+ tokens.text_token i.to_s, :content
+ tokens.end_group :index
end
assert_equal tokens, CodeRay::Encoders::Filter.new.encode_tokens(tokens)
assert_equal tokens, tokens.filter
diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb
index dcdffa1..807fb42 100644
--- a/lib/coderay/encoders/html.rb
+++ b/lib/coderay/encoders/html.rb
@@ -83,7 +83,7 @@ module Encoders
#
# === :hint
# Include some information into the output using the title attribute.
- # Can be :info (show token type on mouse-over), :info_long (with full path)
+ # Can be :info (show token kind on mouse-over), :info_long (with full path)
# or :debug (via inspect).
#
# Default: false
@@ -153,12 +153,18 @@ module Encoders
#
# +hint+ may be :info, :info_long or :debug.
def self.token_path_to_hint hint, kinds
+ # FIXME: TRANSPARENT_TOKEN_KINDS?
+ # if TRANSPARENT_TOKEN_KINDS.include? kinds.first
+ # kinds = kinds[1..-1]
+ # else
+ # kinds = kinds[1..-1] + kinds.first
+ # end
title =
case hint
when :info
TOKEN_KIND_TO_INFO[kinds.first]
when :info_long
- kinds.reverse.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/')
+ kinds.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/')
when :debug
kinds.inspect
end
@@ -167,13 +173,13 @@ module Encoders
def setup options
super
-
+
@HTML_ESCAPE = HTML_ESCAPE.dup
@HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
-
+
@opened = [nil]
@css = CSS.new options[:style]
-
+
hint = options[:hint]
if hint and not [:debug, :info, :info_long].include? hint
raise ArgumentError, "Unknown value %p for :hint; \
@@ -184,45 +190,33 @@ module Encoders
when :class
@css_style = Hash.new do |h, k|
- c = CodeRay::Tokens::AbbreviationForKind[k.first]
- if c == :NO_HIGHLIGHT and not hint
- h[k.dup] = false
- else
- title = if hint
- HTML.token_path_to_hint(hint, k[1..-1] << k.first)
- else
- ''
- end
- if c == :NO_HIGHLIGHT
- h[k.dup] = '<span%s>' % [title]
- else
- h[k.dup] = '<span%s class="%s">' % [title, c]
+ c = Tokens::AbbreviationForKind[k.first]
+ h[k.dup] =
+ if c != :NO_HIGHLIGHT or hint
+ if hint
+ title = HTML.token_path_to_hint hint, k
+ end
+ if c == :NO_HIGHLIGHT
+ '<span%s>' % [title]
+ else
+ '<span%s class="%s">' % [title, c]
+ end
end
- end
end
when :style
@css_style = Hash.new do |h, k|
- if k.is_a? ::Array
- styles = k.dup
- else
- styles = [k]
- end
- type = styles.first
- classes = styles.map { |c| Tokens::AbbreviationForKind[c] }
- if classes.first == :NO_HIGHLIGHT and not hint
- h[k] = false
- else
- styles.shift if TRANSPARENT_TOKEN_KINDS.include? styles.first
- title = HTML.token_path_to_hint hint, styles
- style = @css[*classes]
- h[k] =
+ classes = k.map { |c| Tokens::AbbreviationForKind[c] }
+ h[k.dup] =
+ if classes.first != :NO_HIGHLIGHT or hint
+ if hint
+ title = HTML.token_path_to_hint hint, k
+ end
+ style = @css[*classes]
if style
'<span%s style="%s">' % [title, style]
- else
- false
end
- end
+ end
end
else
@@ -233,80 +227,81 @@ module Encoders
def finish options
not_needed = @opened.shift
- @out << '</span>' * @opened.size
unless @opened.empty?
warn '%d tokens still open: %p' % [@opened.size, @opened]
+ @out << '</span>' * @opened.size
end
-
+
@out.extend Output
@out.css = @css
@out.numerize! options[:line_numbers], options
@out.wrap! options[:wrap]
@out.apply_title! options[:title]
-
+
super
end
-
- def token text, type
- case text
-
- when nil
- # raise 'Token with nil as text was given: %p' % [[text, type]]
-
- when String
- if text =~ /#{HTML_ESCAPE_PATTERN}/o
- text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
- end
- @opened[0] = type
- if text != "\n" && style = @css_style[@opened]
- @out << style << text << '</span>'
+
+ public
+
+ def text_token text, kind
+ if text =~ /#{HTML_ESCAPE_PATTERN}/o
+ text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
+ end
+ @opened[0] = kind
+ @out <<
+ if style = @css_style[@opened]
+ style + text + '</span>'
else
- @out << text
- end
-
-
- # token groups, eg. strings
- when :open
- @opened[0] = type
- @out << (@css_style[@opened] || '<span>')
- @opened << type
- when :close
- if $CODERAY_DEBUG and (@opened.size == 1 or @opened.last != type)
- warn 'Malformed token stream: Trying to close a token (%p) ' \
- 'that is not open. Open are: %p.' % [type, @opened[1..-1]]
+ text
end
+ end
+
+ # token groups, eg. strings
+ def begin_group kind
+ @opened[0] = kind
+ @opened << kind
+ @out << (@css_style[@opened] || '<span>')
+ end
+
+ def end_group kind
+ if $CODERAY_DEBUG and (@opened.size == 1 or @opened.last != kind)
+ warn 'Malformed token stream: Trying to close a token (%p) ' \
+ 'that is not open. Open are: %p.' % [kind, @opened[1..-1]]
+ end
+ @out <<
if @opened.empty?
- # nothing to close
+ '' # nothing to close
else
- @out << '</span>'
@opened.pop
+ '</span>'
end
-
- # whole lines to be highlighted, eg. a deleted line in a diff
- when :begin_line
- @opened[0] = type
- if style = @css_style[@opened]
- @out << style.sub('<span', '<div')
+ end
+
+ # whole lines to be highlighted, eg. a deleted line in a diff
+ def begin_line kind
+ @opened[0] = kind
+ style = @css_style[@opened]
+ @opened << kind
+ @out <<
+ if style
+ style.sub '<span', '<div'
else
- @out << '<div>'
- end
- @opened << type
- when :end_line
- if $CODERAY_DEBUG and (@opened.size == 1 or @opened.last != type)
- warn 'Malformed token stream: Trying to close a line (%p) ' \
- 'that is not open. Open are: %p.' % [type, @opened[1..-1]]
+ '<div>'
end
+ end
+
+ def end_line kind
+ if $CODERAY_DEBUG and (@opened.size == 1 or @opened.last != kind)
+ warn 'Malformed token stream: Trying to close a line (%p) ' \
+ 'that is not open. Open are: %p.' % [kind, @opened[1..-1]]
+ end
+ @out <<
if @opened.empty?
- # nothing to close
+ '' # nothing to close
else
- @out << '</div>'
@opened.pop
+ '</div>'
end
-
- else
- raise 'unknown token kind: %p' % [text]
-
- end
end
end
diff --git a/lib/coderay/encoders/json.rb b/lib/coderay/encoders/json.rb
index 78f0ec0..bb09809 100644
--- a/lib/coderay/encoders/json.rb
+++ b/lib/coderay/encoders/json.rb
@@ -33,11 +33,23 @@ module Encoders
end
def text_token text, kind
- { :type => 'text', :text => text, :kind => kind }
+ @out << { :type => 'text', :text => text, :kind => kind }
end
- def block_token action, kind
- { :type => 'block', :action => action, :kind => kind }
+ def begin_group kind
+ @out << { :type => 'block', :action => 'open', :kind => kind }
+ end
+
+ def end_group kind
+ @out << { :type => 'block', :action => 'close', :kind => kind }
+ end
+
+ def begin_line kind
+ @out << { :type => 'block', :action => 'begin_line', :kind => kind }
+ end
+
+ def end_line kind
+ @out << { :type => 'block', :action => 'end_line', :kind => kind }
end
def finish options
diff --git a/lib/coderay/encoders/lines_of_code.rb b/lib/coderay/encoders/lines_of_code.rb
index c6ed4de..6b36aef 100644
--- a/lib/coderay/encoders/lines_of_code.rb
+++ b/lib/coderay/encoders/lines_of_code.rb
@@ -79,9 +79,9 @@ puts "Hello world!"
def test_filtering_block_tokens
tokens = CodeRay::Tokens.new
- tokens << ["Hello\n", :world]
- tokens << ["Hello\n", :space]
- tokens << ["Hello\n", :comment]
+ tokens.concat ["Hello\n", :world]
+ tokens.concat ["Hello\n", :space]
+ tokens.concat ["Hello\n", :comment]
assert_equal 2, CodeRay::Encoders::LinesOfCode.new.encode_tokens(tokens)
assert_equal 2, tokens.lines_of_code
assert_equal 2, tokens.loc
diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb
index 1b38938..d267b21 100644
--- a/lib/coderay/encoders/statistic.rb
+++ b/lib/coderay/encoders/statistic.rb
@@ -1,3 +1,4 @@
+($:.unshift '../..'; require 'coderay') unless defined? CodeRay
module CodeRay
module Encoders
@@ -34,9 +35,25 @@ module Encoders
end
# TODO Hierarchy handling
- def block_token action, kind
+ def begin_group kind
+ block_token 'begin_group'
+ end
+
+ def end_group kind
+ block_token 'end_group'
+ end
+
+ def begin_line kind
+ block_token 'begin_line'
+ end
+
+ def end_line kind
+ block_token 'end_line'
+ end
+
+ def block_token action
@type_stats['TOTAL'].count += 1
- @type_stats['open/close'].count += 1
+ @type_stats[action].count += 1
end
STATS = <<-STATS # :nodoc:
@@ -77,3 +94,67 @@ Token Types (%d):
end
end
+
+if $0 == __FILE__
+ $VERBOSE = true
+ $: << File.join(File.dirname(__FILE__), '..')
+ eval DATA.read, nil, $0, __LINE__ + 4
+end
+
+__END__
+require 'test/unit'
+
+class StatisticEncoderTest < Test::Unit::TestCase
+
+ def test_creation
+ assert CodeRay::Encoders::Statistic < CodeRay::Encoders::Encoder
+ stats = nil
+ assert_nothing_raised do
+ stats = CodeRay.encoder :statistic
+ end
+ assert_kind_of CodeRay::Encoders::Encoder, stats
+ end
+
+ TEST_INPUT = CodeRay::Tokens[
+ ['10', :integer],
+ ['(\\)', :operator],
+ [:begin_group, :string],
+ ['test', :content],
+ [:end_group, :string],
+ [:begin_line, :test],
+ ["\n", :space],
+ ["\n \t", :space],
+ [" \n", :space],
+ ["[]", :method],
+ [:end_line, :test],
+ ].flatten
+ TEST_OUTPUT = <<-'DEBUG'
+
+Code Statistics
+
+Tokens 11
+ Non-Whitespace 4
+Bytes Total 20
+
+Token Types (5):
+ type count ratio size (average)
+-------------------------------------------------------------
+ TOTAL 11 100.00 % 1.8
+ space 3 27.27 % 3.0
+ begin_group 1 9.09 % 0.0
+ begin_line 1 9.09 % 0.0
+ content 1 9.09 % 4.0
+ end_group 1 9.09 % 0.0
+ end_line 1 9.09 % 0.0
+ integer 1 9.09 % 2.0
+ method 1 9.09 % 2.0
+ operator 1 9.09 % 3.0
+
+ DEBUG
+
+ def test_filtering_text_tokens
+ assert_equal TEST_OUTPUT, CodeRay::Encoders::Statistic.new.encode_tokens(TEST_INPUT)
+ assert_equal TEST_OUTPUT, TEST_INPUT.statistic
+ end
+
+end \ No newline at end of file
diff --git a/lib/coderay/encoders/terminal.rb b/lib/coderay/encoders/terminal.rb
index 7224218..3a774a0 100644
--- a/lib/coderay/encoders/terminal.rb
+++ b/lib/coderay/encoders/terminal.rb
@@ -92,41 +92,72 @@ module CodeRay
TOKEN_COLORS[:keyword] = TOKEN_COLORS[:reserved]
TOKEN_COLORS[:method] = TOKEN_COLORS[:function]
TOKEN_COLORS[:imaginary] = TOKEN_COLORS[:complex]
- TOKEN_COLORS[:open] = TOKEN_COLORS[:close] = TOKEN_COLORS[:nesting_delimiter] = TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
+ TOKEN_COLORS[:begin_group] = TOKEN_COLORS[:end_group] =
+ TOKEN_COLORS[:nesting_delimiter] = TOKEN_COLORS[:escape] =
+ TOKEN_COLORS[:delimiter]
protected
def setup(options)
super
@opened = []
+ @subcolors = nil
end
-
- def finish(options)
- super
- end
-
- def text_token text, type
- if color = (@subcolors || TOKEN_COLORS)[type]
+
+ public
+
+ def text_token text, kind
+ if color = (@subcolors || TOKEN_COLORS)[kind]
if Hash === color
if color[:self]
color = color[:self]
else
- return text
+ @out << text
+ return
end
end
-
- out = ansi_colorize(color)
- out << text.gsub("\n", ansi_clear + "\n" + ansi_colorize(color))
- out << ansi_clear
- out << ansi_colorize(@subcolors[:self]) if @subcolors && @subcolors[:self]
- out
+
+ @out << ansi_colorize(color)
+ @out << text.gsub("\n", ansi_clear + "\n" + ansi_colorize(color))
+ @out << ansi_clear
+ @out << ansi_colorize(@subcolors[:self]) if @subcolors && @subcolors[:self]
else
- text
+ @out << text
end
end
- def open_token type
- if color = TOKEN_COLORS[type]
+ def begin_group kind
+ @opened << kind
+ @out << open_token(kind)
+ end
+ alias begin_line begin_group
+
+ def end_group kind
+ if @opened.empty?
+ # nothing to close
+ else
+ @opened.pop
+ @out << ansi_clear
+ @out << open_token(@opened.last)
+ end
+ end
+
+ def end_line kind
+ if @opened.empty?
+ # nothing to close
+ else
+ @opened.pop
+ # whole lines to be highlighted,
+ # eg. added/modified/deleted lines in a diff
+ @out << "\t" * 100 + ansi_clear
+ @out << open_token(@opened.last)
+ end
+ end
+
+ private
+
+ def open_token kind
+ if color = TOKEN_COLORS[kind]
if Hash === color
@subcolors = color
ansi_colorize(color[:self]) if color[:self]
@@ -140,34 +171,6 @@ module CodeRay
end
end
- def block_token action, type
- case action
-
- when :open, :begin_line
- @opened << type
- open_token type
- when :close, :end_line
- if @opened.empty?
- # nothing to close
- else
- @opened.pop
- if action == :end_line
- # whole lines to be highlighted,
- # eg. added/modified/deleted lines in a diff
- "\t" * 100 + ansi_clear
- else
- ansi_clear
- end +
- open_token(@opened.last)
- end
-
- else
- raise 'unknown token kind: %p' % [text]
- end
- end
-
- private
-
def ansi_colorize(color)
Array(color).map { |c| "\e[#{c}m" }.join
end
diff --git a/lib/coderay/encoders/text.rb b/lib/coderay/encoders/text.rb
index 26fef84..ecbf624 100644
--- a/lib/coderay/encoders/text.rb
+++ b/lib/coderay/encoders/text.rb
@@ -23,16 +23,16 @@ module Encoders
:separator => ''
}
+ def text_token text, kind
+ @out << text + @sep
+ end
+
protected
def setup options
super
@sep = options[:separator]
end
- def text_token text, kind
- text + @sep
- end
-
def finish options
super.chomp @sep
end
diff --git a/lib/coderay/encoders/token_kind_filter.rb b/lib/coderay/encoders/token_kind_filter.rb
index 4b2f582..fd3df44 100644
--- a/lib/coderay/encoders/token_kind_filter.rb
+++ b/lib/coderay/encoders/token_kind_filter.rb
@@ -76,28 +76,28 @@ class TokenKindFilterTest < Test::Unit::TestCase
def test_filtering_text_tokens
tokens = CodeRay::Tokens.new
for i in 1..10
- tokens << [i.to_s, :index]
- tokens << [' ', :space] if i < 10
+ tokens.text_token i.to_s, :index
+ tokens.text_token ' ', :space if i < 10
end
- assert_equal 10, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :space).size
- assert_equal 10, tokens.token_kind_filter(:exclude => :space).size
- assert_equal 9, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :include => :space).size
- assert_equal 9, tokens.token_kind_filter(:include => :space).size
- assert_equal 0, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :all).size
- assert_equal 0, tokens.token_kind_filter(:exclude => :all).size
+ assert_equal 10, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :space).count
+ assert_equal 10, tokens.token_kind_filter(:exclude => :space).count
+ assert_equal 9, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :include => :space).count
+ assert_equal 9, tokens.token_kind_filter(:include => :space).count
+ assert_equal 0, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :all).count
+ assert_equal 0, tokens.token_kind_filter(:exclude => :all).count
end
def test_filtering_block_tokens
tokens = CodeRay::Tokens.new
10.times do |i|
- tokens << [:open, :index]
- tokens << [i.to_s, :content]
- tokens << [:close, :index]
+ tokens.begin_group :index
+ tokens.text_token i.to_s, :content
+ tokens.end_group :index
end
- assert_equal 20, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :include => :blubb).size
- assert_equal 20, tokens.token_kind_filter(:include => :blubb).size
- assert_equal 30, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :index).size
- assert_equal 30, tokens.token_kind_filter(:exclude => :index).size
+ assert_equal 20, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :include => :blubb).count
+ assert_equal 20, tokens.token_kind_filter(:include => :blubb).count
+ assert_equal 30, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :index).count
+ assert_equal 30, tokens.token_kind_filter(:exclude => :index).count
end
end
diff --git a/lib/coderay/encoders/xml.rb b/lib/coderay/encoders/xml.rb
index f32c967..0006d75 100644
--- a/lib/coderay/encoders/xml.rb
+++ b/lib/coderay/encoders/xml.rb
@@ -53,19 +53,19 @@ module Encoders
end
end
end
-
- def open_token kind
+
+ def begin_group kind
@node = @node.add_element kind.to_s
end
-
- def close_token kind
+
+ def end_group kind
if @node == @root
raise 'no token to close!'
end
@node = @node.parent
end
-
+
end
-
+
end
end
diff --git a/lib/coderay/for_redcloth.rb b/lib/coderay/for_redcloth.rb
index 5149562..e439929 100644
--- a/lib/coderay/for_redcloth.rb
+++ b/lib/coderay/for_redcloth.rb
@@ -45,7 +45,7 @@ module CodeRay
if !opts[:lang] && RedCloth::VERSION.to_s >= '4.2.0'
# simulating pre-4.2 behavior
if opts[:text].sub!(/\A\[(\w+)\]/, '')
- if CodeRay::Scanners[$1].plugin_id == 'plaintext'
+ if CodeRay::Scanners[$1].plugin_id == :plaintext
opts[:text] = $& + opts[:text]
else
opts[:lang] = $1
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb
index 165fd7f..286561d 100644
--- a/lib/coderay/scanner.rb
+++ b/lib/coderay/scanner.rb
@@ -61,11 +61,6 @@ module CodeRay
class << self
- # Returns if the Scanner can be used in streaming mode.
- def streamable?
- is_a? Streamable
- end
-
def normify code
code = code.to_s.dup
# try using UTF-8
@@ -115,9 +110,6 @@ module CodeRay
# overwrite default options here.)
# * +block+ is the callback for streamed highlighting.
#
- # If you set :stream to +true+ in the options, the Scanner uses a
- # TokenStream with the +block+ as callback to handle the tokens.
- #
# Else, a Tokens object is used.
def initialize code='', options = {}, &block
raise "I am only the basic Scanner class. I can't scan "\
@@ -129,16 +121,13 @@ module CodeRay
@tokens = options[:tokens]
if @options[:stream]
- warn "warning in CodeRay::Scanner.new: :stream is set, "\
- "but no block was given" unless block_given?
- raise NotStreamableError, self unless kind_of? Streamable
- @tokens ||= TokenStream.new(&block)
+ raise NotImplementedError unless @tokens.is_a? Encoders::Encoder
else
warn "warning in CodeRay::Scanner.new: Block given, "\
"but :stream is #{@options[:stream]}" if block_given?
@tokens ||= Tokens.new
end
- @tokens.scanner = self
+ @tokens.scanner = self if @tokens.respond_to? :scanner=
setup
end
@@ -162,7 +151,7 @@ module CodeRay
# Returns the Plugin ID for this scanner.
def lang
- self.class.plugin_id
+ self.class.plugin_id.to_s
end
# Scans the code and returns all tokens in a Tokens object.
@@ -191,8 +180,6 @@ module CodeRay
# Traverses the tokens.
def each &block
- raise ArgumentError,
- 'Cannot traverse TokenStream.' if @options[:stream]
tokens.each(&block)
end
include Enumerable
@@ -246,7 +233,7 @@ module CodeRay
# Resets the scanner.
def reset_instance
- @tokens.clear unless @options[:keep_tokens]
+ @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
@cached_tokens = nil
@bin_string = nil if defined? @bin_string
end
diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb
index e13dc37..45ca42e 100644
--- a/lib/coderay/scanners/c.rb
+++ b/lib/coderay/scanners/c.rb
@@ -43,7 +43,7 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
state = :initial
label_expected = true
@@ -53,9 +53,6 @@ module Scanners
until eos?
- kind = nil
- match = nil
-
case state
when :initial
@@ -65,15 +62,14 @@ module Scanners
in_preproc_line = false
label_expected = label_expected_before_preproc_line
end
- tokens << [match, :space]
- next
+ encoder.text_token match, :space
- elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
- kind = :comment
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+ encoder.text_token match, :comment
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
- kind = :comment
+ encoder.text_token match, :comment
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
label_expected = match =~ /[;\{\}]/
@@ -81,7 +77,7 @@ module Scanners
label_expected = true if match == ':'
case_expected = false
end
- kind = :operator
+ encoder.text_token match, :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
@@ -97,107 +93,96 @@ module Scanners
end
end
end
+ encoder.text_token match, kind
- elsif scan(/\$/)
- kind = :ident
+ elsif match = scan(/\$/)
+ encoder.text_token match, :ident
elsif match = scan(/L?"/)
- tokens << [:open, :string]
+ encoder.begin_group :string
if match[0] == ?L
- tokens << ['L', :modifier]
+ encoder.text_token 'L', :modifier
match = '"'
end
+ encoder.text_token match, :delimiter
state = :string
- kind = :delimiter
- elsif scan(/#[ \t]*(\w*)/)
- kind = :preprocessor
+ elsif match = scan(/#[ \t]*(\w*)/)
+ encoder.text_token match, :preprocessor
in_preproc_line = true
label_expected_before_preproc_line = label_expected
state = :include_expected if self[1] == 'include'
- elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
+ elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
label_expected = false
- kind = :char
+ encoder.text_token match, :char
- elsif scan(/0[xX][0-9A-Fa-f]+/)
+ elsif match = scan(/0[xX][0-9A-Fa-f]+/)
label_expected = false
- kind = :hex
+ encoder.text_token match, :hex
- elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
+ elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
label_expected = false
- kind = :oct
+ encoder.text_token match, :oct
- elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
+ elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
label_expected = false
- kind = :integer
+ encoder.text_token match, :integer
- elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
label_expected = false
- kind = :float
+ encoder.text_token match, :float
else
- getch
- kind = :error
+ encoder.text_token getch, :error
end
when :string
- if scan(/[^\\\n"]+/)
- kind = :content
- elsif scan(/"/)
- tokens << ['"', :delimiter]
- tokens << [:close, :string]
+ if match = scan(/[^\\\n"]+/)
+ encoder.text_token match, :content
+ elsif match = scan(/"/)
+ encoder.text_token match, :delimiter
+ encoder.end_group :string
state = :initial
label_expected = false
- next
- elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
- kind = :char
- elsif scan(/ \\ | $ /x)
- tokens << [:close, :string]
- kind = :error
+ elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ encoder.text_token match, :char
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group :string
+ encoder.text_token match, :error
state = :initial
label_expected = false
else
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
when :include_expected
- if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
- kind = :include
+ if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
+ encoder.text_token match, :include
state = :initial
elsif match = scan(/\s+/)
- kind = :space
+ encoder.text_token match, :space
state = :initial if match.index ?\n
else
state = :initial
- next
end
else
- raise_inspect 'Unknown state', tokens
+ raise_inspect 'Unknown state', encoder
end
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
-
end
if state == :string
- tokens << [:close, :string]
+ encoder.end_group :string
end
- tokens
+ encoder
end
end
diff --git a/lib/coderay/scanners/cpp.rb b/lib/coderay/scanners/cpp.rb
index eba1bd2..7531892 100644
--- a/lib/coderay/scanners/cpp.rb
+++ b/lib/coderay/scanners/cpp.rb
@@ -53,7 +53,7 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
state = :initial
label_expected = true
@@ -63,9 +63,6 @@ module Scanners
until eos?
- kind = nil
- match = nil
-
case state
when :initial
@@ -75,15 +72,14 @@ module Scanners
in_preproc_line = false
label_expected = label_expected_before_preproc_line
end
- tokens << [match, :space]
- next
+ encoder.text_token match, :space
- elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
- kind = :comment
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+ encoder.text_token match, :comment
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
- kind = :comment
+ encoder.text_token match, :comment
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
label_expected = match =~ /[;\{\}]/
@@ -91,7 +87,7 @@ module Scanners
label_expected = true if match == ':'
case_expected = false
end
- kind = :operator
+ encoder.text_token match, :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
@@ -109,122 +105,110 @@ module Scanners
end
end
end
+ encoder.text_token match, kind
- elsif scan(/\$/)
- kind = :ident
+ elsif match = scan(/\$/)
+ encoder.text_token match, :ident
elsif match = scan(/L?"/)
- tokens << [:open, :string]
+ encoder.begin_group :string
if match[0] == ?L
- tokens << ['L', :modifier]
+ encoder.text_token match, 'L', :modifier
match = '"'
end
state = :string
- kind = :delimiter
+ encoder.text_token match, :delimiter
- elsif scan(/#[ \t]*(\w*)/)
- kind = :preprocessor
+ elsif match = scan(/#[ \t]*(\w*)/)
+ encoder.text_token match, :preprocessor
in_preproc_line = true
label_expected_before_preproc_line = label_expected
state = :include_expected if self[1] == 'include'
- elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
+ elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
label_expected = false
- kind = :char
+ encoder.text_token match, :char
- elsif scan(/0[xX][0-9A-Fa-f]+/)
+ elsif match = scan(/0[xX][0-9A-Fa-f]+/)
label_expected = false
- kind = :hex
+ encoder.text_token match, :hex
- elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
+ elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
label_expected = false
- kind = :oct
+ encoder.text_token match, :oct
- elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
+ elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
label_expected = false
- kind = :integer
+ encoder.text_token match, :integer
- elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
label_expected = false
- kind = :float
+ encoder.text_token match, :float
else
- getch
- kind = :error
+ encoder.text_token getch, :error
end
when :string
- if scan(/[^\\"]+/)
- kind = :content
- elsif scan(/"/)
- tokens << ['"', :delimiter]
- tokens << [:close, :string]
+ if match = scan(/[^\\"]+/)
+ encoder.text_token match, :content
+ elsif match = scan(/"/)
+ encoder.text_token match, :delimiter
+ encoder.end_group :string
state = :initial
label_expected = false
- next
- elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
- kind = :char
- elsif scan(/ \\ | $ /x)
- tokens << [:close, :string]
- kind = :error
+ elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ encoder.text_token match, :char
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group :string
+ encoder.text_token match, :error
state = :initial
label_expected = false
else
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
when :include_expected
- if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
- kind = :include
+ if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
+ encoder.text_token match, :include
state = :initial
elsif match = scan(/\s+/)
- kind = :space
+ encoder.text_token match, :space
state = :initial if match.index ?\n
else
state = :initial
- next
end
when :class_name_expected
- if scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
- kind = :class
+ if match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+ encoder.text_token match, :class
state = :initial
elsif match = scan(/\s+/)
- kind = :space
+ encoder.text_token match, :space
else
- getch
- kind = :error
+ encoder.text_token getch, :error
state = :initial
end
else
- raise_inspect 'Unknown state', tokens
-
- end
+ raise_inspect 'Unknown state', encoder
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
end
if state == :string
- tokens << [:close, :string]
+ encoder.end_group :string
end
- tokens
+ encoder
end
end
diff --git a/lib/coderay/scanners/css.rb b/lib/coderay/scanners/css.rb
index 75cd056..b3f116e 100644
--- a/lib/coderay/scanners/css.rb
+++ b/lib/coderay/scanners/css.rb
@@ -51,129 +51,123 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
value_expected = nil
states = [:initial]
until eos?
- kind = nil
- match = nil
-
- if scan(/\s+/)
- kind = :space
+ if match = scan(/\s+/)
+ encoder.text_token match, :space
elsif case states.last
when :initial, :media
- if scan(/(?>#{RE::Ident})(?!\()|\*/ox)
- kind = :type
- elsif scan RE::Class
- kind = :class
- elsif scan RE::Id
- kind = :constant
- elsif scan RE::PseudoClass
- kind = :pseudo_class
+ if match = scan(/(?>#{RE::Ident})(?!\()|\*/ox)
+ encoder.text_token match, :type
+ elsif match = scan(RE::Class)
+ encoder.text_token match, :class
+ elsif match = scan(RE::Id)
+ encoder.text_token match, :constant
+ elsif match = scan(RE::PseudoClass)
+ encoder.text_token match, :pseudo_class
elsif match = scan(RE::AttributeSelector)
# TODO: Improve highlighting inside of attribute selectors.
- tokens << [match[0,1], :operator]
- tokens << [match[1..-2], :attribute_name] if match.size > 2
- tokens << [match[-1,1], :operator] if match[-1] == ?]
- next
+ encoder.text_token match[0,1], :operator
+ encoder.text_token match[1..-2], :attribute_name if match.size > 2
+ encoder.text_token match[-1,1], :operator if match[-1] == ?]
elsif match = scan(/@media/)
- kind = :directive
+ encoder.text_token match, :directive
states.push :media_before_name
end
when :block
- if scan(/(?>#{RE::Ident})(?!\()/ox)
+ if match = scan(/(?>#{RE::Ident})(?!\()/ox)
if value_expected
- kind = :value
+ encoder.text_token match, :value
else
- kind = :key
+ encoder.text_token match, :key
end
end
when :media_before_name
- if scan RE::Ident
- kind = :type
+ if match = scan(RE::Ident)
+ encoder.text_token match, :type
states[-1] = :media_after_name
end
when :media_after_name
- if scan(/\{/)
- kind = :operator
+ if match = scan(/\{/)
+ encoder.text_token match, :operator
states[-1] = :media
end
when :comment
- if scan(/(?:[^*\s]|\*(?!\/))+/)
- kind = :comment
- elsif scan(/\*\//)
- kind = :comment
+ if match = scan(/(?:[^*\s]|\*(?!\/))+/)
+ encoder.text_token match, :comment
+ elsif match = scan(/\*\//)
+ encoder.text_token match, :comment
states.pop
- elsif scan(/\s+/)
- kind = :space
+ elsif match = scan(/\s+/)
+ encoder.text_token match, :space
end
else
- raise_inspect 'Unknown state', tokens
+ raise_inspect 'Unknown state', encoder
end
- elsif scan(/\/\*/)
- kind = :comment
+ elsif match = scan(/\/\*/)
+ encoder.text_token match, :comment
states.push :comment
- elsif scan(/\{/)
+ elsif match = scan(/\{/)
value_expected = false
- kind = :operator
+ encoder.text_token match, :operator
states.push :block
- elsif scan(/\}/)
+ elsif match = scan(/\}/)
value_expected = false
if states.last == :block || states.last == :media
- kind = :operator
+ encoder.text_token match, :operator
states.pop
else
- kind = :error
+ encoder.text_token match, :error
end
elsif match = scan(/#{RE::String}/o)
- tokens << [:open, :string]
- tokens << [match[0, 1], :delimiter]
- tokens << [match[1..-2], :content] if match.size > 2
- tokens << [match[-1, 1], :delimiter] if match.size >= 2
- tokens << [:close, :string]
- next
+ encoder.begin_group :string
+ encoder.text_token match[0, 1], :delimiter
+ encoder.text_token match[1..-2], :content if match.size > 2
+ encoder.text_token match[-1, 1], :delimiter if match.size >= 2
+ encoder.end_group :string
elsif match = scan(/#{RE::Function}/o)
- tokens << [:open, :string]
+ encoder.begin_group :string
start = match[/^\w+\(/]
- tokens << [start, :delimiter]
+ encoder.text_token start, :delimiter
if match[-1] == ?)
- tokens << [match[start.size..-2], :content]
- tokens << [')', :delimiter]
+ encoder.text_token match[start.size..-2], :content
+ encoder.text_token ')', :delimiter
else
- tokens << [match[start.size..-1], :content]
+ encoder.text_token match[start.size..-1], :content
end
- tokens << [:close, :string]
- next
+ encoder.end_group :string
- elsif scan(/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/ox)
- kind = :float
+ elsif match = scan(/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/ox)
+ encoder.text_token match, :float
- elsif scan(/#{RE::Color}/o)
- kind = :color
+ elsif match = scan(/#{RE::Color}/o)
+ encoder.text_token match, :color
- elsif scan(/! *important/)
- kind = :important
+ elsif match = scan(/! *important/)
+ encoder.text_token match, :important
- elsif scan(/(?:rgb|hsl)a?\([^()\n]*\)?/)
- kind = :color
+ elsif match = scan(/(?:rgb|hsl)a?\([^()\n]*\)?/)
+ encoder.text_token match, :color
- elsif scan(/#{RE::AtKeyword}/o)
- kind = :directive
+ elsif match = scan(RE::AtKeyword)
+ encoder.text_token match, :directive
elsif match = scan(/ [+>:;,.=()\/] /x)
if match == ':'
@@ -181,26 +175,16 @@ module Scanners
elsif match == ';'
value_expected = false
end
- kind = :operator
+ encoder.text_token match, :operator
else
- getch
- kind = :error
-
- end
+ encoder.text_token getch, :error
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
end
- tokens
+ encoder
end
end
diff --git a/lib/coderay/scanners/debug.rb b/lib/coderay/scanners/debug.rb
index e33bff2..0f2b89f 100644
--- a/lib/coderay/scanners/debug.rb
+++ b/lib/coderay/scanners/debug.rb
@@ -14,67 +14,52 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
opened_tokens = []
until eos?
- kind = nil
- match = nil
-
- if scan(/\s+/)
- tokens << [matched, :space]
- next
-
- elsif scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x)
- kind = self[1].to_sym
- match = self[2].gsub(/\\(.)/, '\1')
- unless Tokens::AbbreviationForKind.has_key? kind
- kind = :error
- match = matched
- end
-
- elsif scan(/ (\w+) ([<\[]) /x)
- kind = self[1].to_sym
- opened_tokens << kind
- case self[2]
- when '<'
- match = :open
- when '['
- match = :begin_line
- else
- raise
- end
-
- elsif !opened_tokens.empty? && scan(/ > /x)
- kind = opened_tokens.pop
- match = :close
-
- elsif !opened_tokens.empty? && scan(/ \] /x)
- kind = opened_tokens.pop
- match = :end_line
-
+ if match = scan(/\s+/)
+ encoder.text_token match, :space
+
+ elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x)
+ kind = self[1].to_sym
+ match = self[2].gsub(/\\(.)/, '\1')
+ unless Tokens::AbbreviationForKind.has_key? kind
+ kind = :error
+ match = matched
+ end
+ encoder.text_token match, kind
+
+ elsif match = scan(/ (\w+) ([<\[]) /x)
+ kind = self[1].to_sym
+ opened_tokens << kind
+ case self[2]
+ when '<'
+ encoder.begin_group kind
+ when '['
+ encoder.begin_line kind
else
- kind = :space
- getch
-
+ raise 'CodeRay bug: This case should not be reached.'
end
-
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
+
+ elsif !opened_tokens.empty? && match = scan(/ > /x)
+ encoder.end_group opened_tokens.pop
+
+ elsif !opened_tokens.empty? && match = scan(/ \] /x)
+ encoder.end_line opened_tokens.pop
+
+ else
+ encoder.text_token getch, :space
+
end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
end
- tokens << [:close, opened_tokens.pop] until opened_tokens.empty?
+ encoder.end_group opened_tokens.pop until opened_tokens.empty?
- tokens
+ encoder
end
end
@@ -111,14 +96,14 @@ method([])]
TEST_OUTPUT = CodeRay::Tokens[
['10', :integer],
['(\\)', :operator],
- [:open, :string],
+ [:begin_group, :string],
['test', :content],
- [:close, :string],
+ [:end_group, :string],
[:begin_line, :test],
["\n\n \t \n", :space],
["[]", :method],
[:end_line, :test],
- ]
+ ].flatten
def test_filtering_text_tokens
assert_equal TEST_OUTPUT, CodeRay::Scanners::Debug.new.tokenize(TEST_INPUT)
diff --git a/lib/coderay/scanners/delphi.rb b/lib/coderay/scanners/delphi.rb
index 170f250..e0f4ea1 100644
--- a/lib/coderay/scanners/delphi.rb
+++ b/lib/coderay/scanners/delphi.rb
@@ -42,110 +42,100 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
state = :initial
last_token = ''
-
+
until eos?
-
- kind = nil
- match = nil
-
+
if state == :initial
- if scan(/ \s+ /x)
- tokens << [matched, :space]
+ if match = scan(/ \s+ /x)
+ encoder.text_token match, :space
next
- elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
- tokens << [matched, :preprocessor]
+ elsif match = scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
+ encoder.text_token match, :preprocessor
next
- elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
- tokens << [matched, :comment]
+ elsif match = scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
+ encoder.text_token match, :comment
next
elsif match = scan(/ <[>=]? | >=? | :=? | [-+=*\/;,@\^|\(\)\[\]] | \.\. /x)
- kind = :operator
+ encoder.text_token match, :operator
elsif match = scan(/\./)
- kind = :operator
- if last_token == 'end'
- tokens << [match, kind]
- next
- end
+ encoder.text_token match, :operator
+ next if last_token == 'end'
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
- kind = NAME_FOLLOWS[last_token] ? :ident : IDENT_KIND[match]
+ encoder.text_token match, NAME_FOLLOWS[last_token] ? :ident : IDENT_KIND[match]
- elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
- tokens << [:open, :char]
- tokens << ["'", :delimiter]
- tokens << [self[1], :content]
- tokens << ["'", :delimiter]
- tokens << [:close, :char]
+ elsif match = skip(/ ' ( [^\n']|'' ) (?:'|$) /x)
+ encoder.begin_group :char
+ encoder.text_token "'", :delimiter
+ encoder.text_token self[1], :content
+ encoder.text_token "'", :delimiter
+ encoder.end_group :char
next
elsif match = scan(/ ' /x)
- tokens << [:open, :string]
+ encoder.begin_group :string
+ encoder.text_token match, :delimiter
state = :string
- kind = :delimiter
- elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
- kind = :char
+ elsif match = scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
+ encoder.text_token match, :char
- elsif scan(/ \$ [0-9A-Fa-f]+ /x)
- kind = :hex
+ elsif match = scan(/ \$ [0-9A-Fa-f]+ /x)
+ encoder.text_token match, :hex
- elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
- kind = :integer
+ elsif match = scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
+ encoder.text_token match, :integer
+
+ elsif match = scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
+ encoder.text_token match, :float
- elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
- kind = :float
-
else
- kind = :error
- getch
-
+ encoder.text_token getch, :error
+ next
+
end
elsif state == :string
- if scan(/[^\n']+/)
- kind = :content
- elsif scan(/''/)
- kind = :char
- elsif scan(/'/)
- tokens << ["'", :delimiter]
- tokens << [:close, :string]
+ if match = scan(/[^\n']+/)
+ encoder.text_token match, :content
+ elsif match = scan(/''/)
+ encoder.text_token match, :char
+ elsif match = scan(/'/)
+ encoder.text_token match, :delimiter
+ encoder.end_group :string
state = :initial
next
- elsif scan(/\n/)
- tokens << [:close, :string]
- kind = :error
+ elsif match = scan(/\n/)
+ encoder.end_group :string
+ encoder.text_token match, :space
state = :initial
else
- raise "else case \' reached; %p not handled." % peek(1), tokens
+ raise "else case \' reached; %p not handled." % peek(1), encoder
end
else
- raise 'else-case reached', tokens
+ raise 'else-case reached', encoder
end
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens, state
- end
- raise_inspect 'Empty token', tokens unless match
-
last_token = match
- tokens << [match, kind]
end
- tokens
+ if state == :string
+ encoder.end_group state
+ end
+
+ encoder
end
end
diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb
index 4f3ff2e..417985a 100644
--- a/lib/coderay/scanners/diff.rb
+++ b/lib/coderay/scanners/diff.rb
@@ -13,7 +13,7 @@ module Scanners
require 'coderay/helpers/file_type'
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
line_kind = nil
state = :initial
@@ -21,14 +21,13 @@ module Scanners
content_lang = nil
until eos?
- kind = match = nil
if match = scan(/\n/)
if line_kind
- tokens << [:end_line, line_kind]
+ encoder.end_line line_kind
line_kind = nil
end
- tokens << [match, :space]
+ encoder.text_token match, :space
next
end
@@ -36,89 +35,82 @@ module Scanners
when :initial
if match = scan(/--- |\+\+\+ |=+|_+/)
- tokens << [:begin_line, line_kind = :head]
- tokens << [match, :head]
- if filename = scan(/.*?(?=$|[\t\n\x00]| \(revision)/)
- tokens << [filename, :filename]
- content_lang = FileType.fetch filename, :plaintext
+ encoder.begin_line line_kind = :head
+ encoder.text_token match, :head
+ if match = scan(/.*?(?=$|[\t\n\x00]| \(revision)/)
+ encoder.text_token match, :filename
+ content_lang = FileType.fetch match, :plaintext
end
next unless match = scan(/.+/)
- kind = :plain
+ encoder.text_token match, :plain
elsif match = scan(/Index: |Property changes on: /)
- tokens << [:begin_line, line_kind = :head]
- tokens << [match, :head]
+ encoder.begin_line line_kind = :head
+ encoder.text_token match, :head
next unless match = scan(/.+/)
- kind = :plain
+ encoder.text_token match, :plain
elsif match = scan(/Added: /)
- tokens << [:begin_line, line_kind = :head]
- tokens << [match, :head]
+ encoder.begin_line line_kind = :head
+ encoder.text_token match, :head
next unless match = scan(/.+/)
- kind = :plain
+ encoder.text_token match, :plain
state = :added
elsif match = scan(/\\ /)
- tokens << [:begin_line, line_kind = :change]
- tokens << [match, :change]
+ encoder.begin_line line_kind = :change
+ encoder.text_token match, :change
next unless match = scan(/.+/)
- kind = :plain
+ encoder.text_token match, :plain
elsif match = scan(/@@(?>[^@\n]*)@@/)
if check(/\n|$/)
- tokens << [:begin_line, line_kind = :change]
+ encoder.begin_line line_kind = :change
else
- tokens << [:open, :change]
+ encoder.begin_group :change
end
- tokens << [match[0,2], :change]
- tokens << [match[2...-2], :plain] if match.size > 4
- tokens << [match[-2,2], :change]
- tokens << [:close, :change] unless line_kind
- next unless code = scan(/.+/)
- CodeRay.scan code, content_lang, :tokens => tokens
+ encoder.text_token match[0,2], :change
+ encoder.text_token match[2...-2], :plain if match.size > 4
+ encoder.text_token match[-2,2], :change
+ encoder.end_group :change unless line_kind
+ next unless match = scan(/.+/)
+ CodeRay.scan match, content_lang, :tokens => encoder
next
elsif match = scan(/\+/)
- tokens << [:begin_line, line_kind = :insert]
- tokens << [match, :insert]
+ encoder.begin_line line_kind = :insert
+ encoder.text_token match, :insert
next unless match = scan(/.+/)
- CodeRay.scan match, content_lang, :tokens => tokens
+ CodeRay.scan match, content_lang, :tokens => encoder
next
elsif match = scan(/-/)
- tokens << [:begin_line, line_kind = :delete]
- tokens << [match, :delete]
- next unless code = scan(/.+/)
- CodeRay.scan code, content_lang, :tokens => tokens
+ encoder.begin_line line_kind = :delete
+ encoder.text_token match, :delete
+ next unless match = scan(/.+/)
+ CodeRay.scan match, content_lang, :tokens => encoder
next
- elsif code = scan(/ .*/)
- CodeRay.scan code, content_lang, :tokens => tokens
+ elsif match = scan(/ .*/)
+ CodeRay.scan match, content_lang, :tokens => encoder
next
- elsif scan(/.+/)
- tokens << [:begin_line, line_kind = :comment]
- kind = :plain
+ elsif match = scan(/.+/)
+ encoder.begin_line line_kind = :comment
+ encoder.text_token match, :plain
else
raise_inspect 'else case rached'
end
when :added
if match = scan(/ \+/)
- tokens << [:begin_line, line_kind = :insert]
- tokens << [match, :insert]
+ encoder.begin_line line_kind = :insert
+ encoder.text_token match, :insert
next unless match = scan(/.+/)
- kind = :plain
+ encoder.text_token match, :plain
else
state = :initial
next
end
end
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
end
- tokens << [:end_line, line_kind] if line_kind
- tokens
+ encoder.end_line line_kind if line_kind
+
+ encoder
end
end
diff --git a/lib/coderay/scanners/groovy.rb b/lib/coderay/scanners/groovy.rb
index fd7fbd9..fdbbbc7 100644
--- a/lib/coderay/scanners/groovy.rb
+++ b/lib/coderay/scanners/groovy.rb
@@ -1,11 +1,11 @@
module CodeRay
module Scanners
-
+
load :java
# Scanner for Groovy.
class Groovy < Java
-
+
include Streamable
register_for :groovy
@@ -37,7 +37,7 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
state = :initial
inline_block_stack = []
@@ -45,35 +45,32 @@ module Scanners
string_delimiter = nil
import_clause = class_name_follows = last_token = after_def = false
value_expected = true
-
+
until eos?
-
- kind = nil
- match = nil
case state
-
+
when :initial
-
+
if match = scan(/ \s+ | \\\n /x)
- tokens << [match, :space]
+ encoder.text_token match, :space
if match.index ?\n
import_clause = after_def = false
value_expected = true unless value_expected
end
next
- elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
value_expected = true
after_def = false
- kind = :comment
+ encoder.text_token match, :comment
- elsif bol? && scan(/ \#!.* /x)
- kind = :doctype
+ elsif bol? && match = scan(/ \#!.* /x)
+ encoder.text_token match, :doctype
- elsif import_clause && scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
+ elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
after_def = value_expected = false
- kind = :include
+ encoder.text_token match, :include
elsif match = scan(/ #{IDENT} | \[\] /ox)
kind = IDENT_KIND[match]
@@ -93,16 +90,17 @@ module Scanners
import_clause = match == 'import'
after_def = true if match == 'def'
end
+ encoder.text_token match, kind
- elsif scan(/;/)
+ elsif match = scan(/;/)
import_clause = after_def = false
value_expected = true
- kind = :operator
+ encoder.text_token match, :operator
- elsif scan(/\{/)
+ elsif match = scan(/\{/)
class_name_follows = after_def = false
value_expected = true
- kind = :operator
+ encoder.text_token match, :operator
if !inline_block_stack.empty?
inline_block_paren_depth += 1
end
@@ -113,155 +111,146 @@ module Scanners
value_expected = true
value_expected = :regexp if match == '~'
after_def = false
- kind = :operator
+ encoder.text_token match, :operator
elsif match = scan(/ [)\]}] /x)
value_expected = after_def = false
if !inline_block_stack.empty? && match == '}'
inline_block_paren_depth -= 1
if inline_block_paren_depth == 0 # closing brace of inline block reached
- tokens << [match, :inline_delimiter]
- tokens << [:close, :inline]
+ encoder.text_token match, :inline_delimiter
+ encoder.end_group :inline
state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
next
end
end
- kind = :operator
+ encoder.text_token match, :operator
elsif check(/[\d.]/)
after_def = value_expected = false
- if scan(/0[xX][0-9A-Fa-f]+/)
- kind = :hex
- elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
- kind = :oct
- elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
- kind = :float
- elsif scan(/\d+[lLgG]?/)
- kind = :integer
+ if match = scan(/0[xX][0-9A-Fa-f]+/)
+ encoder.text_token match, :hex
+ elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
+ encoder.text_token match, :oct
+ elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
+ encoder.text_token match, :float
+ elsif match = scan(/\d+[lLgG]?/)
+ encoder.text_token match, :integer
end
-
+
elsif match = scan(/'''|"""/)
after_def = value_expected = false
state = :multiline_string
- tokens << [:open, :string]
+ encoder.begin_group :string
string_delimiter = match
- kind = :delimiter
-
+ encoder.text_token match, :delimiter
+
# TODO: record.'name' syntax
elsif match = scan(/["']/)
after_def = value_expected = false
state = match == '/' ? :regexp : :string
- tokens << [:open, state]
+ encoder.begin_group state
string_delimiter = match
- kind = :delimiter
-
- elsif value_expected && (match = scan(/\//))
+ encoder.text_token match, :delimiter
+
+ elsif value_expected && match = scan(/\//)
after_def = value_expected = false
- tokens << [:open, :regexp]
+ encoder.begin_group :regexp
state = :regexp
string_delimiter = '/'
- kind = :delimiter
-
- elsif scan(/ @ #{IDENT} /ox)
+ encoder.text_token match, :delimiter
+
+ elsif match = scan(/ @ #{IDENT} /ox)
after_def = value_expected = false
- kind = :annotation
-
- elsif scan(/\//)
+ encoder.text_token match, :annotation
+
+ elsif match = scan(/\//)
after_def = false
value_expected = true
- kind = :operator
-
+ encoder.text_token match, :operator
+
else
- getch
- kind = :error
-
+ encoder.text_token getch, :error
+
end
-
+
when :string, :regexp, :multiline_string
- if scan(STRING_CONTENT_PATTERN[string_delimiter])
- kind = :content
+ if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
+ encoder.text_token match, :content
elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
- tokens << [match, :delimiter]
+ encoder.text_token match, :delimiter
if state == :regexp
# TODO: regexp modifiers? s, m, x, i?
modifiers = scan(/[ix]+/)
- tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
+ encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
end
state = :string if state == :multiline_string
- tokens << [:close, state]
+ encoder.end_group state
string_delimiter = nil
after_def = value_expected = false
state = :initial
next
-
+
elsif (state == :string || state == :multiline_string) &&
(match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
- kind = :content
+ encoder.text_token match, :content
else
- kind = :char
+ encoder.text_token match, :char
end
- elsif state == :regexp && scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
- kind = :char
-
+ elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ encoder.text_token match, :char
+
elsif match = scan(/ \$ #{IDENT} /mox)
- tokens << [:open, :inline]
- tokens << ['$', :inline_delimiter]
+ encoder.begin_group :inline
+ encoder.text_token '$', :inline_delimiter
match = match[1..-1]
- tokens << [match, IDENT_KIND[match]]
- tokens << [:close, :inline]
+ encoder.text_token match, IDENT_KIND[match]
+ encoder.end_group :inline
next
elsif match = scan(/ \$ \{ /x)
- tokens << [:open, :inline]
- tokens << ['${', :inline_delimiter]
+ encoder.begin_group :inline
+ encoder.text_token match, :inline_delimiter
inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
inline_block_paren_depth = 1
state = :initial
next
-
- elsif scan(/ \$ /mx)
- kind = :content
-
- elsif scan(/ \\. /mx)
- kind = :content
-
- elsif scan(/ \\ | \n /x)
- tokens << [:close, state]
- kind = :error
+
+ elsif match = scan(/ \$ /mx)
+ encoder.text_token match, :content
+
+ elsif match = scan(/ \\. /mx)
+ encoder.text_token match, :content # FIXME: Shouldn't this be :error?
+
+ elsif match = scan(/ \\ | \n /x)
+ encoder.end_group state
+ encoder.text_token match, :error
after_def = value_expected = false
state = :initial
-
+
else
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
+
end
-
+
else
- raise_inspect 'Unknown state', tokens
-
- end
-
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
+ raise_inspect 'Unknown state', encoder
+
end
- raise_inspect 'Empty token', tokens unless match
last_token = match unless [:space, :comment, :doctype].include? kind
- tokens << [match, kind]
-
end
-
+
if [:multiline_string, :string, :regexp].include? state
- tokens << [:close, state]
+ encoder.end_group state
end
-
- tokens
+
+ encoder
end
-
+
end
-
+
end
end
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb
index 52c7520..8f71e0e 100644
--- a/lib/coderay/scanners/html.rb
+++ b/lib/coderay/scanners/html.rb
@@ -53,135 +53,125 @@ module Scanners
@state = :initial
@plain_string_content = nil
end
-
- def scan_tokens tokens, options
-
+
+ def scan_tokens encoder, options
+
state = @state
plain_string_content = @plain_string_content
-
+
until eos?
-
- kind = nil
- match = nil
-
- if scan(/\s+/m)
- kind = :space
-
+
+ if match = scan(/\s+/m)
+ encoder.text_token match, :space
+
else
-
+
case state
-
+
when :initial
- if scan(/<!--.*?-->/m)
- kind = :comment
- elsif scan(/<!DOCTYPE.*?>/m)
- kind = :doctype
- elsif scan(/<\?xml.*?\?>/m)
- kind = :preprocessor
- elsif scan(/<\?.*?\?>|<%.*?%>/m)
- kind = :comment
- elsif scan(/<\/[-\w.:]*>/m)
- kind = :tag
+ if match = scan(/<!--.*?-->/m)
+ encoder.text_token match, :comment
+ elsif match = scan(/<!DOCTYPE.*?>/m)
+ encoder.text_token match, :doctype
+ elsif match = scan(/<\?xml.*?\?>/m)
+ encoder.text_token match, :preprocessor
+ elsif match = scan(/<\?.*?\?>|<%.*?%>/m)
+ encoder.text_token match, :comment
+ elsif match = scan(/<\/[-\w.:]*>/m)
+ encoder.text_token match, :tag
elsif match = scan(/<[-\w.:]+>?/m)
- kind = :tag
+ encoder.text_token match, :tag
state = :attribute unless match[-1] == ?>
- elsif scan(/[^<>&]+/)
- kind = :plain
- elsif scan(/#{ENTITY}/ox)
- kind = :entity
- elsif scan(/[<>&]/)
- kind = :error
+ elsif match = scan(/[^<>&]+/)
+ encoder.text_token match, :plain
+ elsif match = scan(/#{ENTITY}/ox)
+ encoder.text_token match, :entity
+ elsif match = scan(/[<>&]/)
+ encoder.text_token match, :error
else
- raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
+ raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
end
-
+
when :attribute
- if scan(/#{TAG_END}/)
- kind = :tag
+ if match = scan(/#{TAG_END}/)
+ encoder.text_token match, :tag
state = :initial
- elsif scan(/#{ATTR_NAME}/o)
- kind = :attribute_name
+ elsif match = scan(/#{ATTR_NAME}/o)
+ encoder.text_token match, :attribute_name
state = :attribute_equal
else
- kind = :error
- getch
+ encoder.text_token getch, :error
end
-
+
when :attribute_equal
- if scan(/=/)
- kind = :operator
+ if match = scan(/=/)
+ encoder.text_token match, :operator
state = :attribute_value
- elsif scan(/#{ATTR_NAME}/o)
- kind = :attribute_name
- elsif scan(/#{TAG_END}/o)
- kind = :tag
+ elsif match = scan(/#{ATTR_NAME}/o)
+ encoder.text_token match, :attribute_name
+ elsif match = scan(/#{TAG_END}/o)
+ encoder.text_token match, :tag
state = :initial
- elsif scan(/./)
- kind = :error
+ else
+ encoder.text_token getch, :error
state = :attribute
end
-
+
when :attribute_value
- if scan(/#{ATTR_NAME}/o)
- kind = :attribute_value
+ if match = scan(/#{ATTR_NAME}/o)
+ encoder.text_token match, :attribute_value
state = :attribute
elsif match = scan(/["']/)
- tokens << [:open, :string]
+ encoder.begin_group :string
state = :attribute_value_string
plain_string_content = PLAIN_STRING_CONTENT[match]
- kind = :delimiter
+ encoder.text_token match, :delimiter
elsif scan(/#{TAG_END}/o)
- kind = :tag
+ encoder.text_token match, :tag
state = :initial
else
- kind = :error
- getch
+ encoder.text_token getch, :error
end
-
+
when :attribute_value_string
- if scan(plain_string_content)
- kind = :content
- elsif scan(/['"]/)
- tokens << [matched, :delimiter]
- tokens << [:close, :string]
+ if match = scan(plain_string_content)
+ encoder.text_token match, :content
+ elsif match = scan(/['"]/)
+ encoder.text_token match, :delimiter
+ encoder.end_group :string
state = :attribute
- next
- elsif scan(/#{ENTITY}/ox)
- kind = :entity
- elsif scan(/&/)
- kind = :content
- elsif scan(/[\n>]/)
- tokens << [:close, :string]
- kind = :error
+ elsif match = scan(/#{ENTITY}/ox)
+ encoder.text_token match, :entity
+ elsif match = scan(/&/)
+ encoder.text_token match, :content
+ elsif match = scan(/[\n>]/)
+ encoder.end_group :string
state = :initial
+ encoder.text_token match, :error
end
-
+
else
- raise_inspect 'Unknown state: %p' % [state], tokens
-
+ raise_inspect 'Unknown state: %p' % [state], encoder
+
end
-
+
end
-
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens, state
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
+
end
-
+
if options[:keep_state]
@state = state
@plain_string_content = plain_string_content
+ else
+ if state == :attribute_value_string
+ encoder.end_group :string
+ end
end
-
- tokens
+
+ encoder
end
-
+
end
-
+
end
end
diff --git a/lib/coderay/scanners/java.rb b/lib/coderay/scanners/java.rb
index e4a7421..e7becda 100644
--- a/lib/coderay/scanners/java.rb
+++ b/lib/coderay/scanners/java.rb
@@ -48,7 +48,7 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
state = :initial
string_delimiter = nil
@@ -58,23 +58,20 @@ module Scanners
until eos?
- kind = nil
- match = nil
-
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
- tokens << [match, :space]
+ encoder.text_token match, :space
next
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
- tokens << [match, :comment]
+ encoder.text_token match, :comment
next
- elsif package_name_expected && scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
- kind = package_name_expected
+ elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
+ encoder.text_token match, package_name_expected
elsif match = scan(/ #{IDENT} | \[\] /ox)
kind = IDENT_KIND[match]
@@ -93,92 +90,82 @@ module Scanners
class_name_follows = true
end
end
+ encoder.text_token match, kind
- elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
- kind = :operator
+ elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
+ encoder.text_token match, :operator
- elsif scan(/;/)
+ elsif match = scan(/;/)
package_name_expected = false
- kind = :operator
+ encoder.text_token match, :operator
- elsif scan(/\{/)
+ elsif match = scan(/\{/)
class_name_follows = false
- kind = :operator
+ encoder.text_token match, :operator
elsif check(/[\d.]/)
- if scan(/0[xX][0-9A-Fa-f]+/)
- kind = :hex
- elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
- kind = :oct
- elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
- kind = :float
- elsif scan(/\d+[lL]?/)
- kind = :integer
+ if match = scan(/0[xX][0-9A-Fa-f]+/)
+ encoder.text_token match, :hex
+ elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
+ encoder.text_token match, :oct
+ elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
+ encoder.text_token match, :float
+ elsif match = scan(/\d+[lL]?/)
+ encoder.text_token match, :integer
end
elsif match = scan(/["']/)
- tokens << [:open, :string]
state = :string
+ encoder.begin_group state
string_delimiter = match
- kind = :delimiter
+ encoder.text_token match, :delimiter
- elsif scan(/ @ #{IDENT} /ox)
- kind = :annotation
+ elsif match = scan(/ @ #{IDENT} /ox)
+ encoder.text_token match, :annotation
else
- getch
- kind = :error
+ encoder.text_token getch, :error
end
when :string
- if scan(STRING_CONTENT_PATTERN[string_delimiter])
- kind = :content
+ if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
+ encoder.text_token match, :content
elsif match = scan(/["'\/]/)
- tokens << [match, :delimiter]
- tokens << [:close, state]
- string_delimiter = nil
+ encoder.text_token match, :delimiter
+ encoder.end_group state
state = :initial
- next
+ string_delimiter = nil
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
- kind = :content
+ encoder.text_token match, :content
else
- kind = :char
+ encoder.text_token match, :char
end
- elsif scan(/\\./m)
- kind = :content
- elsif scan(/ \\ | $ /x)
- tokens << [:close, state]
- kind = :error
+ elsif match = scan(/\\./m)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group state
state = :initial
+ encoder.text_token match, :error
else
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
else
- raise_inspect 'Unknown state', tokens
+ raise_inspect 'Unknown state', encoder
end
-
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
last_token_dot = match == '.'
- tokens << [match, kind]
-
end
if state == :string
- tokens << [:close, state]
+ encoder.end_group state
end
- tokens
+ encoder
end
end
diff --git a/lib/coderay/scanners/java_script.rb b/lib/coderay/scanners/java_script.rb
index 92ac005..3ae8d80 100644
--- a/lib/coderay/scanners/java_script.rb
+++ b/lib/coderay/scanners/java_script.rb
@@ -5,12 +5,12 @@ module Scanners
#
# Aliases: +ecmascript+, +ecma_script+, +javascript+
class JavaScript < Scanner
-
+
include Streamable
-
+
register_for :java_script
file_extension 'js'
-
+
# The actual JavaScript keywords.
KEYWORDS = %w[
break case catch continue default delete do else
@@ -40,7 +40,7 @@ module Scanners
add(PREDEFINED_CONSTANTS, :pre_constant).
add(MAGIC_VARIABLES, :local_variable).
add(KEYWORDS, :keyword) # :nodoc:
-
+
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
@@ -56,47 +56,43 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
state = :initial
string_delimiter = nil
value_expected = true
key_expected = false
function_expected = false
-
+
until eos?
-
- kind = nil
- match = nil
case state
-
+
when :initial
-
+
if match = scan(/ \s+ | \\\n /x)
value_expected = true if !value_expected && match.index(?\n)
- tokens << [match, :space]
- next
-
- elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+ encoder.text_token match, :space
+
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
value_expected = true
- kind = :comment
-
+ encoder.text_token match, :comment
+
elsif check(/\.?\d/)
key_expected = value_expected = false
- if scan(/0[xX][0-9A-Fa-f]+/)
- kind = :hex
- elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
- kind = :oct
- elsif scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
- kind = :float
- elsif scan(/\d+/)
- kind = :integer
+ if match = scan(/0[xX][0-9A-Fa-f]+/)
+ encoder.text_token match, :hex
+ elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
+ encoder.text_token match, :oct
+ elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ encoder.text_token match, :float
+ elsif match = scan(/\d+/)
+ encoder.text_token match, :integer
end
-
+
elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
# FIXME: scan over nested tags
- xml_scanner.tokenize match
+ xml_scanner.tokenize match, :tokens => encoder
value_expected = false
next
@@ -105,12 +101,12 @@ module Scanners
last_operator = match[-1]
key_expected = (last_operator == ?{) || (last_operator == ?,)
function_expected = false
- kind = :operator
-
- elsif scan(/ [)\]}]+ /x)
+ encoder.text_token match, :operator
+
+ elsif match = scan(/ [)\]}]+ /x)
function_expected = key_expected = value_expected = false
- kind = :operator
-
+ encoder.text_token match, :operator
+
elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
kind = IDENT_KIND[match]
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
@@ -128,101 +124,91 @@ module Scanners
end
function_expected = (kind == :keyword) && (match == 'function')
key_expected = false
-
+ encoder.text_token match, kind
+
elsif match = scan(/["']/)
if key_expected && check(KEY_CHECK_PATTERN[match])
state = :key
else
state = :string
end
- tokens << [:open, state]
+ encoder.begin_group state
string_delimiter = match
- kind = :delimiter
-
+ encoder.text_token match, :delimiter
+
elsif value_expected && (match = scan(/\/(?=\S)/))
- tokens << [:open, :regexp]
+ encoder.begin_group :regexp
state = :regexp
string_delimiter = '/'
- kind = :delimiter
-
- elsif scan(/ \/ /x)
+ encoder.text_token match, :delimiter
+
+ elsif match = scan(/ \/ /x)
value_expected = true
key_expected = false
- kind = :operator
-
+ encoder.text_token match, :operator
+
else
- getch
- kind = :error
-
+ encoder.text_token getch, :error
+
end
-
+
when :string, :regexp, :key
- if scan(STRING_CONTENT_PATTERN[string_delimiter])
- kind = :content
+ if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
+ encoder.text_token match, :content
elsif match = scan(/["'\/]/)
- tokens << [match, :delimiter]
+ encoder.text_token match, :delimiter
if state == :regexp
modifiers = scan(/[gim]+/)
- tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
+ encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
end
- tokens << [:close, state]
+ encoder.end_group state
string_delimiter = nil
key_expected = value_expected = false
state = :initial
- next
elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
- kind = :content
+ encoder.text_token match, :content
else
- kind = :char
+ encoder.text_token match, :char
end
- elsif state == :regexp && scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
- kind = :char
- elsif scan(/\\./m)
- kind = :content
- elsif scan(/ \\ | $ /x)
- tokens << [:close, state]
- kind = :error
+ elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ encoder.text_token match, :char
+ elsif match = scan(/\\./m)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group state
+ encoder.text_token match, :error
key_expected = value_expected = false
state = :initial
else
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
-
+
else
- raise_inspect 'Unknown state', tokens
-
- end
-
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
+ raise_inspect 'Unknown state', encoder
+
end
- raise_inspect 'Empty token', tokens unless match
- tokens << [match, kind]
-
end
-
+
if [:string, :regexp].include? state
- tokens << [:close, state]
+ encoder.end_group state
end
-
- tokens
+
+ encoder
end
-
+
protected
-
+
def reset_instance
super
@xml_scanner.reset if defined? @xml_scanner
end
-
+
def xml_scanner
@xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
end
-
+
end
end
diff --git a/lib/coderay/scanners/json.rb b/lib/coderay/scanners/json.rb
index ca74ff3..668fd82 100644
--- a/lib/coderay/scanners/json.rb
+++ b/lib/coderay/scanners/json.rb
@@ -19,7 +19,7 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
state = :initial
stack = []
@@ -27,82 +27,67 @@ module Scanners
until eos?
- kind = nil
- match = nil
-
case state
when :initial
- if match = scan(/ \s+ | \\\n /x)
- tokens << [match, :space]
- next
+ if match = scan(/ \s+ /x)
+ encoder.text_token match, :space
+ elsif match = scan(/"/)
+ state = key_expected ? :key : :string
+ encoder.begin_group state
+ encoder.text_token match, :delimiter
elsif match = scan(/ [:,\[{\]}] /x)
- kind = :operator
+ encoder.text_token match, :operator
case match
- when '{' then stack << :object; key_expected = true
- when '[' then stack << :array
when ':' then key_expected = false
when ',' then key_expected = true if stack.last == :object
+ when '{' then stack << :object; key_expected = true
+ when '[' then stack << :array
when '}', ']' then stack.pop # no error recovery, but works for valid JSON
end
elsif match = scan(/ true | false | null /x)
- kind = :value
- elsif match = scan(/-?(?:0|[1-9]\d*)/)
+ encoder.text_token match, :value
+ elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
kind = :integer
- if scan(/\.\d+(?:[eE][-+]?\d+)?|[eE][-+]?\d+/)
+ if scan(/ \.\d+ (?:[eE][-+]?\d+)? | [eE][-+]? \d+ /x)
match << matched
kind = :float
end
- elsif match = scan(/"/)
- state = key_expected ? :key : :string
- tokens << [:open, state]
- kind = :delimiter
+ encoder.text_token match, kind
else
- getch
- kind = :error
+ encoder.text_token getch, :error
end
when :string, :key
- if scan(/[^\\"]+/)
- kind = :content
- elsif scan(/"/)
- tokens << ['"', :delimiter]
- tokens << [:close, state]
+ if match = scan(/[^\\"]+/)
+ encoder.text_token match, :content
+ elsif match = scan(/"/)
+ encoder.text_token match, :delimiter
+ encoder.end_group state
state = :initial
- next
- elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
- kind = :char
- elsif scan(/\\./m)
- kind = :content
- elsif scan(/ \\ | $ /x)
- tokens << [:close, state]
- kind = :error
+ elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ encoder.text_token match, :char
+ elsif match = scan(/\\./m)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group state
+ encoder.text_token match, :error
state = :initial
else
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
else
- raise_inspect 'Unknown state', tokens
+ raise_inspect 'Unknown state', encoder
end
-
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
-
end
if [:string, :key].include? state
- tokens << [:close, state]
+ encoder.end_group state
end
- tokens
+ encoder
end
end
diff --git a/lib/coderay/scanners/nitro_xhtml.rb b/lib/coderay/scanners/nitro_xhtml.rb
index fe6b303..ba8ee71 100644
--- a/lib/coderay/scanners/nitro_xhtml.rb
+++ b/lib/coderay/scanners/nitro_xhtml.rb
@@ -1,14 +1,14 @@
module CodeRay
module Scanners
-
+
load :html
load :ruby
-
+
# Nitro XHTML Scanner
#
# Alias: +nitro+
class NitroXHTML < Scanner
-
+
include Streamable
register_for :nitro_xhtml
file_extension :xhtml
@@ -38,7 +38,7 @@ module Scanners
)
(?: %> )?
/mx # :nodoc:
-
+
NITRO_VALUE_BLOCK = /
\#
(?:
@@ -55,83 +55,83 @@ module Scanners
| \\ [^\\]* \\?
)
/x # :nodoc:
-
+
NITRO_ENTITY = /
% (?: \#\d+ | \w+ ) ;
/ # :nodoc:
-
+
START_OF_RUBY = /
(?=[<\#%])
< (?: \?r | % | ruby> )
| \# [{(|]
| % (?: \#\d+ | \w+ ) ;
/x # :nodoc:
-
+
CLOSING_PAREN = Hash.new { |h, p| h[p] = p } # :nodoc:
CLOSING_PAREN.update( {
'(' => ')',
'[' => ']',
'{' => '}',
} )
-
+
protected
-
+
def setup
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end
-
+
def reset_instance
super
@html_scanner.reset
end
-
- def scan_tokens tokens, options
-
+
+ def scan_tokens encoder, options
+
until eos?
-
- if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty?
+
+ if (match = scan_until(/(?=#{START_OF_RUBY})/o) || match = scan_until(/\z/)) and not match.empty?
@html_scanner.tokenize match
-
+
elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
start_tag = match[0,2]
delimiter = CLOSING_PAREN[start_tag[1,1]]
end_tag = match[-1,1] == delimiter ? delimiter : ''
- tokens << [:open, :inline]
- tokens << [start_tag, :inline_delimiter]
+ encoder.begin_group :inline
+ encoder.text_token start_tag, :inline_delimiter
code = match[start_tag.size .. -1 - end_tag.size]
- @ruby_scanner.tokenize code
- tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
- tokens << [:close, :inline]
-
+ @ruby_scanner.tokenize code, :tokens => encoder
+ encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
+ encoder.end_group :inline
+
elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
start_tag = '<?r'
end_tag = match[-2,2] == '?>' ? '?>' : ''
- tokens << [:open, :inline]
- tokens << [start_tag, :inline_delimiter]
+ encoder.begin_group :inline
+ encoder.text_token start_tag, :inline_delimiter
code = match[start_tag.size .. -(end_tag.size)-1]
- @ruby_scanner.tokenize code
- tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
- tokens << [:close, :inline]
-
+ @ruby_scanner.tokenize code, :tokens => encoder
+ encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
+ encoder.end_group :inline
+
elsif entity = scan(/#{NITRO_ENTITY}/o)
- tokens << [entity, :entity]
-
+ encoder.text_token entity, :entity
+
elsif scan(/%/)
- tokens << [matched, :error]
-
+ encoder.text_token matched, :error
+
else
- raise_inspect 'else-case reached!', tokens
+ raise_inspect 'else-case reached!', encoder
end
-
+
end
-
- tokens
-
+
+ encoder
+
end
-
+
end
-
+
end
end
diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb
index 289e795..67bb233 100644
--- a/lib/coderay/scanners/php.rb
+++ b/lib/coderay/scanners/php.rb
@@ -230,7 +230,7 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
if check(RE::PHP_START) || # starts with <?
(match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
@@ -252,29 +252,24 @@ module Scanners
until eos?
- match = nil
- kind = nil
-
case states.last
when :initial # HTML
- if scan RE::PHP_START
- kind = :inline_delimiter
+ if match = scan(RE::PHP_START)
+ encoder.text_token match, :inline_delimiter
label_expected = true
states << :php
else
match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
@html_scanner.tokenize match unless match.empty?
- next
end
when :php
if match = scan(/\s+/)
- tokens << [match, :space]
- next
+ encoder.text_token match, :space
- elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
- kind = :comment
+ elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
+ encoder.text_token match, :comment
elsif match = scan(RE::IDENTIFIER)
kind = Words::IDENT_KIND[match]
@@ -299,77 +294,68 @@ module Scanners
next
end
end
+ encoder.text_token match, kind
- elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
+ elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
label_expected = false
- kind = :float
+ encoder.text_token match, :float
- elsif scan(/0x[0-9a-fA-F]+/)
+ elsif match = scan(/0x[0-9a-fA-F]+/)
label_expected = false
- kind = :hex
+ encoder.text_token match, :hex
- elsif scan(/\d+/)
+ elsif match = scan(/\d+/)
label_expected = false
- kind = :integer
-
- elsif scan(/'/)
- tokens << [:open, :string]
- if modifier
- tokens << [modifier, :modifier]
- modifier = nil
- end
- kind = :delimiter
- states.push :sqstring
+ encoder.text_token match, :integer
- elsif match = scan(/["`]/)
- tokens << [:open, :string]
+ elsif match = scan(/['"`]/)
+ encoder.begin_group :string
if modifier
- tokens << [modifier, :modifier]
+ encoder.text_token modifier, :modifier
modifier = nil
end
delimiter = match
- kind = :delimiter
- states.push :dqstring
+ encoder.text_token match, :delimiter
+ states.push match == "'" ? :sqstring : :dqstring
elsif match = scan(RE::VARIABLE)
label_expected = false
- kind = Words::VARIABLE_KIND[match]
+ encoder.text_token match, Words::VARIABLE_KIND[match]
- elsif scan(/\{/)
- kind = :operator
+ elsif match = scan(/\{/)
+ encoder.text_token match, :operator
label_expected = true
states.push :php
- elsif scan(/\}/)
+ elsif match = scan(/\}/)
if states.size == 1
- kind = :error
+ encoder.text_token match, :error
else
states.pop
if states.last.is_a?(::Array)
delimiter = states.last[1]
states[-1] = states.last[0]
- tokens << [matched, :delimiter]
- tokens << [:close, :inline]
- next
+ encoder.text_token match, :delimiter
+ encoder.end_group :inline
else
- kind = :operator
+ encoder.text_token match, :operator
label_expected = true
end
end
- elsif scan(/@/)
+ elsif match = scan(/@/)
label_expected = false
- kind = :exception
+ encoder.text_token match, :exception
- elsif scan RE::PHP_END
- kind = :inline_delimiter
+ elsif match = scan(RE::PHP_END)
+ encoder.text_token match, :inline_delimiter
states = [:initial]
elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
- tokens << [:open, :string]
+ encoder.begin_group :string
warn 'heredoc in heredoc?' if heredoc_delimiter
heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
- kind = :delimiter
+ encoder.text_token match, :delimiter
states.push self[3] ? :sqstring : :dqstring
heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
@@ -379,152 +365,141 @@ module Scanners
label_expected = true if match == ':'
case_expected = false
end
- kind = :operator
+ encoder.text_token match, :operator
else
- getch
- kind = :error
+ encoder.text_token getch, :error
end
when :sqstring
- if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
- kind = :content
- elsif !heredoc_delimiter && scan(/'/)
- tokens << [matched, :delimiter]
- tokens << [:close, :string]
+ if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
+ encoder.text_token match, :content
+ elsif !heredoc_delimiter && match = scan(/'/)
+ encoder.text_token match, :delimiter
+ encoder.end_group :string
delimiter = nil
label_expected = false
states.pop
- next
elsif heredoc_delimiter && match = scan(/\n/)
- kind = :content
if scan heredoc_delimiter
- tokens << ["\n", :content]
- tokens << [matched, :delimiter]
- tokens << [:close, :string]
+ encoder.text_token "\n", :content
+ encoder.text_token matched, :delimiter
+ encoder.end_group :string
heredoc_delimiter = nil
label_expected = false
states.pop
- next
+ else
+ encoder.text_token match, :content
end
- elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
- kind = :char
- elsif scan(/\\./m)
- kind = :content
- elsif scan(/\\/)
- kind = :error
+ elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
+ encoder.text_token match, :char
+ elsif match = scan(/\\./m)
+ encoder.text_token match, :content
+ elsif match = scan(/\\/)
+ encoder.text_token match, :error
+ else
+ states.pop
end
when :dqstring
- if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
- kind = :content
- elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
- tokens << [matched, :delimiter]
- tokens << [:close, :string]
+ if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
+ encoder.text_token match, :content
+ elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
+ encoder.text_token match, :delimiter
+ encoder.end_group :string
delimiter = nil
label_expected = false
states.pop
- next
elsif heredoc_delimiter && match = scan(/\n/)
- kind = :content
if scan heredoc_delimiter
- tokens << ["\n", :content]
- tokens << [matched, :delimiter]
- tokens << [:close, :string]
+ encoder.text_token "\n", :content
+ encoder.text_token matched, :delimiter
+ encoder.end_group :string
heredoc_delimiter = nil
label_expected = false
states.pop
- next
+ else
+ encoder.text_token match, :content
end
- elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
- kind = :char
- elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
- kind = :char
- elsif scan(/\\./m)
- kind = :content
- elsif scan(/\\/)
- kind = :error
+ elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
+ encoder.text_token match, :char
+ elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
+ encoder.text_token match, :char
+ elsif match = scan(/\\./m)
+ encoder.text_token match, :content
+ elsif match = scan(/\\/)
+ encoder.text_token match, :error
elsif match = scan(/#{RE::VARIABLE}/o)
- kind = :local_variable
if check(/\[#{RE::IDENTIFIER}\]/o)
- tokens << [:open, :inline]
- tokens << [match, :local_variable]
- tokens << [scan(/\[/), :operator]
- tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
- tokens << [scan(/\]/), :operator]
- tokens << [:close, :inline]
- next
+ encoder.begin_group :inline
+ encoder.text_token match, :local_variable
+ encoder.text_token scan(/\[/), :operator
+ encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
+ encoder.text_token scan(/\]/), :operator
+ encoder.end_group :inline
elsif check(/\[/)
match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
- kind = :error
+ encoder.text_token match, :error
elsif check(/->#{RE::IDENTIFIER}/o)
- tokens << [:open, :inline]
- tokens << [match, :local_variable]
- tokens << [scan(/->/), :operator]
- tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
- tokens << [:close, :inline]
- next
+ encoder.begin_group :inline
+ encoder.text_token match, :local_variable
+ encoder.text_token scan(/->/), :operator
+ encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
+ encoder.end_group :inline
elsif check(/->/)
match << scan(/->/)
- kind = :error
+ encoder.text_token match, :error
+ else
+ encoder.text_token match, :local_variable
end
elsif match = scan(/\{/)
if check(/\$/)
- kind = :delimiter
+ encoder.begin_group :inline
states[-1] = [states.last, delimiter]
delimiter = nil
states.push :php
- tokens << [:open, :inline]
+ encoder.text_token match, :delimiter
else
- kind = :string
+ encoder.text_token match, :string
end
- elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
- kind = :local_variable
- elsif scan(/\$/)
- kind = :content
+ elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
+ encoder.text_token match, :local_variable
+ elsif match = scan(/\$/)
+ encoder.text_token match, :content
+ else
+ states.pop
end
when :class_expected
- if scan(/\s+/)
- kind = :space
+ if match = scan(/\s+/)
+ encoder.text_token match, :space
elsif match = scan(/#{RE::IDENTIFIER}/o)
- kind = :class
+ encoder.text_token match, :class
states.pop
else
states.pop
- next
end
when :function_expected
- if scan(/\s+/)
- kind = :space
- elsif scan(/&/)
- kind = :operator
+ if match = scan(/\s+/)
+ encoder.text_token match, :space
+ elsif match = scan(/&/)
+ encoder.text_token match, :operator
elsif match = scan(/#{RE::IDENTIFIER}/o)
- kind = :function
+ encoder.text_token match, :function
states.pop
else
states.pop
- next
end
else
- raise_inspect 'Unknown state!', tokens, states
- end
-
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens, states
+ raise_inspect 'Unknown state!', encoder, states
end
- raise_inspect 'Empty token', tokens, states unless match
-
- tokens << [match, kind]
end
- tokens
+ encoder
end
end
diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb
index b8db721..e176403 100644
--- a/lib/coderay/scanners/plaintext.rb
+++ b/lib/coderay/scanners/plaintext.rb
@@ -17,8 +17,9 @@ module Scanners
protected
- def scan_tokens tokens, options
- tokens << [string, :plain]
+ def scan_tokens encoder, options
+ encoder.text_token string, :plain
+ encoder
end
end
diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb
index be5205e..568ed57 100644
--- a/lib/coderay/scanners/python.rb
+++ b/lib/coderay/scanners/python.rb
@@ -98,7 +98,7 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
state = :initial
string_delimiter = nil
@@ -111,37 +111,34 @@ module Scanners
until eos?
- kind = nil
- match = nil
-
if state == :string
- if scan(STRING_DELIMITER_REGEXP[string_delimiter])
- tokens << [matched, :delimiter]
- tokens << [:close, string_type]
+ if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
+ encoder.text_token match, :delimiter
+ encoder.end_group string_type
string_type = nil
state = :initial
next
- elsif string_delimiter.size == 3 && scan(/\n/)
- kind = :content
- elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
- kind = :content
- elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
- kind = :char
- elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
- kind = :char
- elsif scan(/ \\ . /x)
- kind = :content
- elsif scan(/ \\ | $ /x)
- tokens << [:close, string_type]
+ elsif string_delimiter.size == 3 && match = scan(/\n/)
+ encoder.text_token match, :content
+ elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
+ encoder.text_token match, :content
+ elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
+ encoder.text_token match, :char
+ elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
+ encoder.text_token match, :char
+ elsif match = scan(/ \\ . /x)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group string_type
string_type = nil
- kind = :error
+ encoder.text_token match, :error
state = :initial
else
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
end
elsif match = scan(/ [ \t]+ | \\?\n /x)
- tokens << [match, :space]
+ encoder.text_token match, :space
if match == "\n"
state = :initial if state == :include_expected
docstring_coming = true if match?(/[ \t]*u?r?"""/)
@@ -149,28 +146,28 @@ module Scanners
next
elsif match = scan(/ \# [^\n]* /mx)
- tokens << [match, :comment]
+ encoder.text_token match, :comment
next
elsif state == :initial
- if scan(/#{OPERATOR}/o)
- kind = :operator
+ if match = scan(/#{OPERATOR}/o)
+ encoder.text_token match, :operator
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
string_delimiter = self[2]
string_type = docstring_coming ? :docstring : :string
docstring_coming = false if docstring_coming
- tokens << [:open, string_type]
+ encoder.begin_group string_type
string_raw = false
modifiers = self[1]
unless modifiers.empty?
string_raw = !!modifiers.index(?r)
- tokens << [modifiers, :modifier]
+ encoder.text_token modifiers, :modifier
match = string_delimiter
end
state = :string
- kind = :delimiter
+ encoder.text_token match, :delimiter
# TODO: backticks
@@ -186,43 +183,45 @@ module Scanners
state = DEF_NEW_STATE[match]
from_import_state << match.to_sym if state == :include_expected
end
+ encoder.text_token match, kind
- elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
- kind = :decorator
+ elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
+ encoder.text_token match, :decorator
- elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
- kind = :hex
+ elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
+ encoder.text_token match, :hex
- elsif scan(/0[bB][01]+[lL]?/)
- kind = :bin
+ elsif match = scan(/0[bB][01]+[lL]?/)
+ encoder.text_token match, :bin
elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
- kind = :float
if scan(/[jJ]/)
match << matched
- kind = :imaginary
+ encoder.text_token match, :imaginary
+ else
+ encoder.text_token match, :float
end
- elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
- kind = :oct
+ elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
+ encoder.text_token match, :oct
elsif match = scan(/\d+([lL])?/)
- kind = :integer
if self[1] == nil && scan(/[jJ]/)
match << matched
- kind = :imaginary
+ encoder.text_token match, :imaginary
+ else
+ encoder.text_token match, :integer
end
else
- getch
- kind = :error
+ encoder.text_token getch, :error
end
elsif state == :def_expected
state = :initial
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
- kind = :method
+ encoder.text_token match, :method
else
next
end
@@ -230,33 +229,34 @@ module Scanners
elsif state == :class_expected
state = :initial
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
- kind = :class
+ encoder.text_token match, :class
else
next
end
elsif state == :include_expected
if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
- kind = :include
if match == 'as'
- kind = :keyword
+ encoder.text_token match, :keyword
from_import_state << :as
elsif from_import_state.first == :from && match == 'import'
- kind = :keyword
+ encoder.text_token match, :keyword
from_import_state << :import
elsif from_import_state.last == :as
- # kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
- kind = :ident
+ # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
+ encoder.text_token match, :ident
from_import_state.pop
elsif IDENT_KIND[match] == :keyword
unscan
match = nil
state = :initial
next
+ else
+ encoder.text_token match, :include
end
elsif match = scan(/,/)
from_import_state.pop if from_import_state.last == :as
- kind = :operator
+ encoder.text_token match, :operator
else
from_import_state = []
state = :initial
@@ -264,28 +264,19 @@ module Scanners
end
else
- raise_inspect 'Unknown state', tokens, state
+ raise_inspect 'Unknown state', encoder, state
end
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens, state
- end
- raise_inspect 'Empty token', tokens, state unless match
-
last_token_dot = match == '.'
- tokens << [match, kind]
-
end
if state == :string
- tokens << [:close, string_type]
+ encoder.end_group string_type
end
- tokens
+ encoder
end
end
diff --git a/lib/coderay/scanners/rhtml.rb b/lib/coderay/scanners/rhtml.rb
index 01fda8e..064a92c 100644
--- a/lib/coderay/scanners/rhtml.rb
+++ b/lib/coderay/scanners/rhtml.rb
@@ -1,18 +1,18 @@
module CodeRay
module Scanners
-
+
load :html
load :ruby
-
+
# Scanner for HTML ERB templates.
class RHTML < Scanner
-
+
include Streamable
register_for :rhtml
title 'HTML ERB Template'
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
-
+
ERB_RUBY_BLOCK = /
<%(?!%)[=-]?
(?>
@@ -24,51 +24,51 @@ module Scanners
)
(?: -?%> )?
/x # :nodoc:
-
+
START_OF_ERB = /
<%(?!%)
/x # :nodoc:
-
+
protected
-
+
def setup
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end
-
+
def reset_instance
super
@html_scanner.reset
end
-
- def scan_tokens tokens, options
-
+
+ def scan_tokens encoder, options
+
until eos?
-
+
if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
- @html_scanner.tokenize match
-
+ @html_scanner.tokenize match, :tokens => encoder
+
elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
start_tag = match[/\A<%[-=]?/]
end_tag = match[/-?%?>?\z/]
- tokens << [:open, :inline]
- tokens << [start_tag, :inline_delimiter]
+ encoder.begin_group :inline
+ encoder.text_token start_tag, :inline_delimiter
code = match[start_tag.size .. -1 - end_tag.size]
@ruby_scanner.tokenize code
- tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
- tokens << [:close, :inline]
-
+ encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
+ encoder.end_group :inline
+
else
- raise_inspect 'else-case reached!', tokens
+ raise_inspect 'else-case reached!', encoder
end
-
+
end
-
- tokens
-
+
+ encoder
+
end
-
+
end
-
+
end
end
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index 0e8e802..dcbfce0 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -30,7 +30,7 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
patterns = Patterns # avoid constant lookup
@@ -50,20 +50,18 @@ module Scanners
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
until eos?
- match = nil
- kind = nil
if state.instance_of? patterns::StringState
match = scan_until(state.pattern) || scan_until(/\z/)
- tokens << [match, :content] unless match.empty?
+ encoder.text_token match, :content unless match.empty?
break if eos?
if state.heredoc and self[1] # end of heredoc
match = getch.to_s
match << scan_until(/$/) unless eos?
- tokens << [match, :delimiter]
- tokens << [:close, state.type]
+ encoder.text_token match, :delimiter
+ encoder.end_group state.type
state = state.next_state
next
end
@@ -74,34 +72,34 @@ module Scanners
if state.paren_depth
state.paren_depth -= 1
if state.paren_depth > 0
- tokens << [match, :nesting_delimiter]
+ encoder.text_token match, :nesting_delimiter
next
end
end
- tokens << [match, :delimiter]
+ encoder.text_token match, :delimiter
if state.type == :regexp and not eos?
modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
- tokens << [modifiers, :modifier] unless modifiers.empty?
+ encoder.text_token modifiers, :modifier unless modifiers.empty?
end
- tokens << [:close, state.type]
+ encoder.end_group state.type
value_expected = false
state = state.next_state
when '\\'
if state.interpreted
if esc = scan(/ #{patterns::ESCAPE} /ox)
- tokens << [match + esc, :char]
+ encoder.text_token match + esc, :char
else
- tokens << [match, :error]
+ encoder.text_token match, :error
end
else
case m = getch
when state.delim, '\\'
- tokens << [match + m, :char]
+ encoder.text_token match + m, :char
when nil
- tokens << [match, :error]
+ encoder.text_token match, :error
else
- tokens << [match + m, :content]
+ encoder.text_token match + m, :content
end
end
@@ -113,42 +111,38 @@ module Scanners
value_expected = true
state = :initial
inline_block_curly_depth = 1
- tokens << [:open, :inline]
- tokens << [match + getch, :inline_delimiter]
+ encoder.begin_group :inline
+ encoder.text_token match + getch, :inline_delimiter
when '$', '@'
- tokens << [match, :escape]
+ encoder.text_token match, :escape
last_state = state
state = :initial
else
raise_inspect 'else-case # reached; #%p not handled' %
- [peek(1)], tokens
+ [peek(1)], encoder
end
when state.opening_paren
state.paren_depth += 1
- tokens << [match, :nesting_delimiter]
+ encoder.text_token match, :nesting_delimiter
when /#{patterns::REGEXP_SYMBOLS}/ox
- tokens << [match, :function]
+ encoder.text_token match, :function
else
raise_inspect 'else-case " reached; %p not handled, state = %p' %
- [match, state], tokens
+ [match, state], encoder
end
- next
else
if match = scan(/[ \t\f]+/)
- kind = :space
match << scan(/\s*/) unless eos? || heredocs
value_expected = true if match.index(?\n)
- tokens << [match, kind]
- next
+ encoder.text_token match, :space
elsif match = scan(/\\?\n/)
- kind = :space
if match == "\n"
value_expected = true
state = :initial if state == :undef_comma_expected
@@ -156,24 +150,20 @@ module Scanners
if heredocs
unscan # heredoc scanning needs \n at start
state = heredocs.shift
- tokens << [:open, state.type]
+ encoder.begin_group state.type
heredocs = nil if heredocs.empty?
next
else
match << scan(/\s*/) unless eos?
end
- tokens << [match, kind]
- next
+ encoder.text_token match, :space
elsif bol? && match = scan(/\#!.*/)
- tokens << [match, :doctype]
- next
+ encoder.text_token match, :doctype
elsif match = scan(/\#.*/) or
(bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o))
- kind = :comment
- tokens << [match, kind]
- next
+ encoder.text_token match, :comment
elsif state == :initial
@@ -192,16 +182,16 @@ module Scanners
value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
end
value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
+ encoder.text_token match, kind
elsif method_call_expected and
match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
/#{patterns::METHOD_AFTER_DOT}/o)
- kind =
- if method_call_expected == '::' && match[/^[A-Z]/] && !match?(/\(/)
- :constant
- else
- :ident
- end
+ if method_call_expected == '::' && match[/^[A-Z]/] && !match?(/\(/)
+ encoder.text_token match, :constant
+ else
+ encoder.text_token match, :ident
+ end
method_call_expected = false
value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
@@ -209,7 +199,6 @@ module Scanners
elsif not method_call_expected and match = scan(/ \.\.\.? | (\.|::) | [,\(\)\[\]\{\}] | ==?=? /x)
value_expected = match !~ / [.\)\]\}] /x || match =~ /\A\.\./
method_call_expected = self[1]
- kind = :operator
if inline_block_stack
case match
when '{'
@@ -220,35 +209,36 @@ module Scanners
state, inline_block_curly_depth, heredocs = inline_block_stack.pop
inline_block_stack = nil if inline_block_stack.empty?
heredocs = nil if heredocs && heredocs.empty?
- tokens << [match, :inline_delimiter]
- kind = :inline
- match = :close
+ encoder.text_token match, :inline_delimiter
+ encoder.end_group :inline
+ next
end
end
end
+ encoder.text_token match, :operator
elsif match = scan(/ ['"] /mx)
- tokens << [:open, :string]
- kind = :delimiter
+ encoder.begin_group :string
+ encoder.text_token match, :delimiter
state = patterns::StringState.new :string, match == '"', match # important for streaming
elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
/#{patterns::INSTANCE_VARIABLE}/o)
value_expected = false
- kind = :instance_variable
+ encoder.text_token match, :instance_variable
elsif value_expected and match = scan(/\//)
- tokens << [:open, :regexp]
- kind = :delimiter
+ encoder.begin_group :regexp
+ encoder.text_token match, :delimiter
interpreted = true
state = patterns::StringState.new :regexp, interpreted, match
- elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
+ elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
if method_call_expected
- kind = :error
+ encoder.text_token match, :error
method_call_expected = false
else
- kind = self[1] ? :float : :integer
+ encoder.text_token match, self[1] ? :float : :integer
end
value_expected = false
@@ -256,28 +246,28 @@ module Scanners
/#{patterns::SYMBOL}/o)
case delim = match[1]
when ?', ?"
- tokens << [:open, :symbol]
- tokens << [':', :symbol]
+ encoder.begin_group :symbol
+ encoder.text_token ':', :symbol
match = delim.chr
- kind = :delimiter
+ encoder.text_token match, :delimiter
state = patterns::StringState.new :symbol, delim == ?", match
else
- kind = :symbol
+ encoder.text_token match, :symbol
value_expected = false
end
elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
value_expected = true
- kind = :operator
+ encoder.text_token match, :operator
elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
indented = self[1] == '-'
quote = self[3]
delim = self[quote ? 4 : 2]
kind = patterns::QUOTE_TO_TYPE[quote]
- tokens << [:open, kind]
- tokens << [match, :delimiter]
- match = :close
+ encoder.begin_group kind
+ encoder.text_token match, :delimiter
+ encoder.end_group kind
heredoc = patterns::StringState.new kind, quote != '\'',
delim, (indented ? :indented : :linestart )
heredocs ||= [] # create heredocs if empty
@@ -286,38 +276,38 @@ module Scanners
elsif value_expected and match = scan(/#{patterns::FANCY_START}/o)
kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
- raise_inspect 'Unknown fancy string: %%%p' % k, tokens
+ raise_inspect 'Unknown fancy string: %%%p' % k, encoder
end
- tokens << [:open, kind]
+ encoder.begin_group kind
state = patterns::StringState.new kind, interpreted, self[2]
- kind = :delimiter
+ encoder.text_token match, :delimiter
elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
value_expected = false
- kind = :integer
+ encoder.text_token match, :integer
elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
value_expected = true
- kind = :operator
+ encoder.text_token match, :operator
elsif match = scan(/`/)
if method_call_expected
- kind = :operator
+ encoder.text_token match, :operator
value_expected = true
else
- tokens << [:open, :shell]
- kind = :delimiter
+ encoder.begin_group :shell
+ encoder.text_token match, :delimiter
state = patterns::StringState.new :shell, true, match
end
elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
/#{patterns::GLOBAL_VARIABLE}/o)
- kind = :global_variable
+ encoder.text_token match, :global_variable
value_expected = false
elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
/#{patterns::CLASS_VARIABLE}/o)
- kind = :class_variable
+ encoder.text_token match, :class_variable
value_expected = false
else
@@ -340,9 +330,9 @@ module Scanners
end
next if unicode
end
- kind = :error
- match = getch
-
+
+ encoder.text_token getch, :error
+
end
if last_state
@@ -353,34 +343,30 @@ module Scanners
elsif state == :def_expected
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
- kind = :method
+ encoder.text_token match, :method
state = :initial
else
last_state = :dot_expected
state = :initial
- next
end
elsif state == :dot_expected
if match = scan(/\.|::/)
# invalid definition
state = :def_expected
- kind = :operator
+ encoder.text_token match, :operator
else
state = :initial
- next
end
elsif state == :module_expected
if match = scan(/<</)
- kind = :operator
+ encoder.text_token match, :operator
else
state = :initial
if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
- kind = :class
- else
- next
+ encoder.text_token match, :class
end
end
@@ -388,31 +374,29 @@ module Scanners
state = :undef_comma_expected
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
- kind = :method
+ encoder.text_token match, :method
elsif match = scan(/#{patterns::SYMBOL}/o)
case delim = match[1]
when ?', ?"
- tokens << [:open, :symbol]
- tokens << [':', :symbol]
+ encoder.begin_group :symbol
+ encoder.text_token ':', :symbol
match = delim.chr
- kind = :delimiter
+ encoder.text_token match, :delimiter
state = patterns::StringState.new :symbol, delim == ?", match
state.next_state = :undef_comma_expected
else
- kind = :symbol
+ encoder.text_token match, :symbol
end
else
state = :initial
- next
end
elsif state == :undef_comma_expected
if match = scan(/,/)
- kind = :operator
+ encoder.text_token match, :operator
state = :undef_expected
else
state = :initial
- next
end
elsif state == :alias_expected
@@ -420,38 +404,30 @@ module Scanners
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
if match
- tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
- tokens << [self[2], :space]
- tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
+ encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
+ encoder.text_token self[2], :space
+ encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
end
state = :initial
- next
end
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens, state
- end
- raise_inspect 'Empty token', tokens, state unless match
-
- tokens << [match, kind]
end
end
# cleaning up
if state.is_a? patterns::StringState
- tokens << [:close, state.type]
+ encoder.end_group state.type
end
if inline_block_stack
until inline_block_stack.empty?
state, *more = inline_block_stack.pop
- tokens << [:close, :inline] if more
- tokens << [:close, state.type]
+ encoder.end_group :inline if more
+ encoder.end_group state.type
end
end
- tokens
+ encoder
end
end
diff --git a/lib/coderay/scanners/scheme.rb b/lib/coderay/scanners/scheme.rb
index cbd9729..c29641e 100644
--- a/lib/coderay/scanners/scheme.rb
+++ b/lib/coderay/scanners/scheme.rb
@@ -72,74 +72,63 @@ module CodeRay
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
state = :initial
ident_kind = IDENT_KIND
until eos?
- kind = match = nil
case state
when :initial
- if scan(/ \s+ | \\\n /x)
- kind = :space
- elsif scan(/['\(\[\)\]]|#\(/)
- kind = :operator # FIXME: was :operator_fat
- elsif scan(/;.*/)
- kind = :comment
- elsif scan(/#\\(?:newline|space|.?)/)
- kind = :char
- elsif scan(/#[ft]/)
- kind = :pre_constant
- elsif scan(/#{IDENTIFIER}/o)
- kind = ident_kind[matched]
- elsif scan(/\./)
- kind = :operator
- elsif scan(/"/)
- tokens << [:open, :string]
+ if match = scan(/ \s+ | \\\n /x)
+ encoder.text_token match, :space
+ elsif match = scan(/['\(\[\)\]]|#\(/)
+ encoder.text_token match, :operator # FIXME: was :operator_fat
+ elsif match = scan(/;.*/)
+ encoder.text_token match, :comment
+ elsif match = scan(/#\\(?:newline|space|.?)/)
+ encoder.text_token match, :char
+ elsif match = scan(/#[ft]/)
+ encoder.text_token match, :pre_constant
+ elsif match = scan(/#{IDENTIFIER}/o)
+ encoder.text_token match, ident_kind[matched]
+ elsif match = scan(/\./)
+ encoder.text_token match, :operator
+ elsif match = scan(/"/)
+ encoder.begin_group :string
+ encoder.text_token match, :delimiter
state = :string
- tokens << ['"', :delimiter]
- next
- elsif scan(/#{NUM}/o) and not matched.empty?
- kind = :integer
- elsif getch
- kind = :error
+ elsif match = scan(/#{NUM}/o) and not matched.empty?
+ encoder.text_token match, :integer
+ else
+ encoder.text_token getch, :error
end
when :string
- if scan(/[^"\\]+/) or scan(/\\.?/)
- kind = :content
- elsif scan(/"/)
- tokens << ['"', :delimiter]
- tokens << [:close, :string]
+ if match = scan(/[^"\\]+|\\.?/)
+ encoder.text_token match, :content
+ elsif match = scan(/"/)
+ encoder.text_token match, :delimiter
+ encoder.end_group :string
state = :initial
- next
else
raise_inspect "else case \" reached; %p not handled." % peek(1),
- tokens, state
+ encoder, state
end
else
- raise "else case reached"
- end
-
- match ||= matched
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
+ raise 'else case reached'
+
end
- raise_inspect 'Empty token', tokens, state unless match
-
- tokens << [match, kind]
end
if state == :string
- tokens << [:close, :string]
+ encoder.end_group state
end
- tokens
+ encoder
end
end
diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb
index 3aeea77..d62a2c3 100644
--- a/lib/coderay/scanners/sql.rb
+++ b/lib/coderay/scanners/sql.rb
@@ -51,7 +51,7 @@ module CodeRay module Scanners
STRING_PREFIXES = /[xnb]|_\w+/i
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
state = :initial
string_type = nil
@@ -59,54 +59,50 @@ module CodeRay module Scanners
until eos?
- kind = nil
- match = nil
-
if state == :initial
- if scan(/ \s+ | \\\n /x)
- kind = :space
+ if match = scan(/ \s+ | \\\n /x)
+ encoder.text_token match, :space
- elsif scan(/^(?:--\s?|#).*/)
- kind = :comment
+ elsif match = scan(/^(?:--\s?|#).*/)
+ encoder.text_token match, :comment
- elsif scan(%r( /\* (!)? (?: .*? \*/ | .* ) )mx)
- kind = self[1] ? :directive : :comment
+ elsif match = scan(%r( /\* (!)? (?: .*? \*/ | .* ) )mx)
+ encoder.text_token match, self[1] ? :directive : :comment
- elsif scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x)
- kind = :operator
+ elsif match = scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x)
+ encoder.text_token match, :operator
- elsif scan(/(#{STRING_PREFIXES})?([`"'])/o)
+ elsif match = scan(/(#{STRING_PREFIXES})?([`"'])/o)
prefix = self[1]
string_type = self[2]
- tokens << [:open, :string]
- tokens << [prefix, :modifier] if prefix
+ encoder.begin_group :string
+ encoder.text_token prefix, :modifier if prefix
match = string_type
state = :string
- kind = :delimiter
+ encoder.text_token match, :delimiter
elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x)
# FIXME: Don't match keywords after "."
- kind = match[0] == ?@ ? :variable : IDENT_KIND[match.downcase]
+ encoder.text_token match, match[0] == ?@ ? :variable : IDENT_KIND[match.downcase]
- elsif scan(/0[xX][0-9A-Fa-f]+/)
- kind = :hex
+ elsif match = scan(/0[xX][0-9A-Fa-f]+/)
+ encoder.text_token match, :hex
- elsif scan(/0[0-7]+(?![89.eEfF])/)
- kind = :oct
+ elsif match = scan(/0[0-7]+(?![89.eEfF])/)
+ encoder.text_token match, :oct
- elsif scan(/(?>\d+)(?![.eEfF])/)
- kind = :integer
+ elsif match = scan(/(?>\d+)(?![.eEfF])/)
+ encoder.text_token match, :integer
- elsif scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
- kind = :float
+ elsif match = scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
+ encoder.text_token match, :float
- elsif scan(/\\N/)
- kind = :pre_constant
+ elsif match = scan(/\\N/)
+ encoder.text_token match, :pre_constant
else
- getch
- kind = :error
+ encoder.text_token getch, :error
end
@@ -121,54 +117,48 @@ module CodeRay module Scanners
next
end
unless string_content.empty?
- tokens << [string_content, :content]
+ encoder.text_token string_content, :content
string_content = ''
end
- tokens << [matched, :delimiter]
- tokens << [:close, :string]
+ encoder.text_token match, :delimiter
+ encoder.end_group :string
state = :initial
string_type = nil
- next
else
string_content << match
end
- next
- elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
unless string_content.empty?
- tokens << [string_content, :content]
+ encoder.text_token string_content, :content
string_content = ''
end
- kind = :char
+ encoder.text_token match, :char
elsif match = scan(/ \\ . /mox)
string_content << match
next
- elsif scan(/ \\ | $ /x)
+ elsif match = scan(/ \\ | $ /x)
unless string_content.empty?
- tokens << [string_content, :content]
+ encoder.text_token string_content, :content
string_content = ''
end
- kind = :error
+ encoder.text_token match, :error
state = :initial
else
- raise "else case \" reached; %p not handled." % peek(1), tokens
+ raise "else case \" reached; %p not handled." % peek(1), encoder
end
else
- raise 'else-case reached', tokens
+ raise 'else-case reached', encoder
end
- match ||= matched
- unless kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens, state
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
-
end
- tokens
+
+ if state == :string
+ encoder.end_group state
+ end
+
+ encoder
end
diff --git a/lib/coderay/scanners/yaml.rb b/lib/coderay/scanners/yaml.rb
index 62a6aba..3c3928f 100644
--- a/lib/coderay/scanners/yaml.rb
+++ b/lib/coderay/scanners/yaml.rb
@@ -13,7 +13,7 @@ module Scanners
protected
- def scan_tokens tokens, options
+ def scan_tokens encoder, options
value_expected = nil
state = :initial
@@ -21,50 +21,48 @@ module Scanners
until eos?
- kind = nil
- match = nil
key_indent = nil if bol?
if match = scan(/ +[\t ]*/)
- kind = :space
+ encoder.text_token match, :space
elsif match = scan(/\n+/)
- kind = :space
+ encoder.text_token match, :space
state = :initial if match.index(?\n)
elsif match = scan(/#.*/)
- kind = :comment
+ encoder.text_token match, :comment
elsif bol? and case
when match = scan(/---|\.\.\./)
- tokens << [:open, :head]
- tokens << [match, :head]
- tokens << [:close, :head]
+ encoder.begin_group :head
+ encoder.text_token match, :head
+ encoder.end_group :head
next
when match = scan(/%.*/)
- tokens << [match, :doctype]
+ encoder.text_token match, :doctype
next
end
elsif state == :value and case
- when !check(/(?:"[^"]*")(?=: |:$)/) && scan(/"/)
- tokens << [:open, :string]
- tokens << [matched, :delimiter]
- tokens << [matched, :content] if scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)
- tokens << [matched, :delimiter] if scan(/"/)
- tokens << [:close, :string]
+ when !check(/(?:"[^"]*")(?=: |:$)/) && match = scan(/"/)
+ encoder.begin_group :string
+ encoder.text_token match, :delimiter
+ encoder.text_token match, :content if match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)
+ encoder.text_token match, :delimiter if match = scan(/"/)
+ encoder.end_group :string
next
when match = scan(/[|>][-+]?/)
- tokens << [:open, :string]
- tokens << [match, :delimiter]
+ encoder.begin_group :string
+ encoder.text_token match, :delimiter
string_indent = key_indent || column(pos - match.size - 1)
- tokens << [matched, :content] if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
- tokens << [:close, :string]
+ encoder.text_token matched, :content if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
+ encoder.end_group :string
next
when match = scan(/(?![!"*&]).+?(?=$|\s+#)/)
- tokens << [match, :string]
+ encoder.text_token match, :string
string_indent = key_indent || column(pos - match.size - 1)
- tokens << [matched, :string] if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
+ encoder.text_token matched, :string if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
next
end
@@ -72,68 +70,69 @@ module Scanners
when match = scan(/[-:](?= |$)/)
state = :value if state == :colon && (match == ':' || match == '-')
state = :value if state == :initial && match == '-'
- kind = :operator
+ encoder.text_token match, :operator
+ next
when match = scan(/[,{}\[\]]/)
- kind = :operator
+ encoder.text_token match, :operator
+ next
when state == :initial && match = scan(/[\w.() ]*\S(?=: |:$)/)
- kind = :key
+ encoder.text_token match, :key
key_indent = column(pos - match.size - 1)
- # tokens << [key_indent.inspect, :debug]
+ # encoder.text_token key_indent.inspect, :debug
state = :colon
+ next
when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?=: |:$)/)
- tokens << [:open, :key]
- tokens << [match[0,1], :delimiter]
- tokens << [match[1..-2], :content]
- tokens << [match[-1,1], :delimiter]
- tokens << [:close, :key]
+ encoder.begin_group :key
+ encoder.text_token match[0,1], :delimiter
+ encoder.text_token match[1..-2], :content
+ encoder.text_token match[-1,1], :delimiter
+ encoder.end_group :key
key_indent = column(pos - match.size - 1)
- # tokens << [key_indent.inspect, :debug]
+ # encoder.text_token key_indent.inspect, :debug
state = :colon
next
- when scan(/(![\w\/]+)(:([\w:]+))?/)
- tokens << [self[1], :type]
+ when match = scan(/(![\w\/]+)(:([\w:]+))?/)
+ encoder.text_token self[1], :type
if self[2]
- tokens << [':', :operator]
- tokens << [self[3], :class]
+ encoder.text_token ':', :operator
+ encoder.text_token self[3], :class
end
next
- when scan(/&\S+/)
- kind = :variable
- when scan(/\*\w+/)
- kind = :global_variable
- when scan(/<</)
- kind = :class_variable
- when scan(/\d\d:\d\d:\d\d/)
- kind = :oct
- when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
- kind = :oct
- when scan(/:\w+/)
- kind = :symbol
- when scan(/[^:\s]+(:(?! |$)[^:\s]*)* .*/)
- kind = :error
- when scan(/[^:\s]+(:(?! |$)[^:\s]*)*/)
- kind = :error
+ when match = scan(/&\S+/)
+ encoder.text_token match, :variable
+ next
+ when match = scan(/\*\w+/)
+ encoder.text_token match, :global_variable
+ next
+ when match = scan(/<</)
+ encoder.text_token match, :class_variable
+ next
+ when match = scan(/\d\d:\d\d:\d\d/)
+ encoder.text_token match, :oct
+ next
+ when match = scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
+ encoder.text_token match, :oct
+ next
+ when match = scan(/:\w+/)
+ encoder.text_token match, :symbol
+ next
+ when match = scan(/[^:\s]+(:(?! |$)[^:\s]*)* .*/)
+ encoder.text_token match, :error
+ next
+ when match = scan(/[^:\s]+(:(?! |$)[^:\s]*)*/)
+ encoder.text_token match, :error
+ next
end
else
- getch
- kind = :error
+ raise if eos?
+ encoder.text_token getch, :error
end
- match ||= matched
-
- if $CODERAY_DEBUG and not kind
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens, state
- end
- raise_inspect 'Empty token', tokens, state unless match
-
- tokens << [match, kind]
-
end
- tokens
+ encoder
end
end
diff --git a/lib/coderay/token_kinds.rb b/lib/coderay/token_kinds.rb
index 3e63372..9904d50 100755
--- a/lib/coderay/token_kinds.rb
+++ b/lib/coderay/token_kinds.rb
@@ -79,7 +79,6 @@ module CodeRay
:plain => :NO_HIGHLIGHT,
}
AbbreviationForKind[:method] = AbbreviationForKind[:function]
- AbbreviationForKind[:open] = AbbreviationForKind[:close] = AbbreviationForKind[:delimiter]
AbbreviationForKind[:nesting_delimiter] = AbbreviationForKind[:delimiter]
AbbreviationForKind[:escape] = AbbreviationForKind[:delimiter]
AbbreviationForKind[:docstring] = AbbreviationForKind[:comment]
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index 2a0dc15..c85c2f1 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -1,6 +1,6 @@
module CodeRay
- # = Tokens
+ # = Tokens TODO: Rewrite!
#
# The Tokens class represents a list of tokens returnd from
# a Scanner.
@@ -8,7 +8,7 @@ module CodeRay
# A token is not a special object, just a two-element Array
# consisting of
# * the _token_ _text_ (the original source of the token in a String) or
- # a _token_ _action_ (:open, :close, :begin_line, :end_line)
+ # a _token_ _action_ (begin_group, end_group, begin_line, end_line)
# * the _token_ _kind_ (a Symbol representing the type of the token)
#
# A token looks like this:
@@ -18,16 +18,16 @@ module CodeRay
# ['$^', :error]
#
# Some scanners also yield sub-tokens, represented by special
- # token actions, namely :open and :close.
+ # token actions, namely begin_group and end_group.
#
# The Ruby scanner, for example, splits "a string" into:
#
# [
- # [:open, :string],
+ # [:begin_group, :string],
# ['"', :delimiter],
# ['a string', :content],
# ['"', :delimiter],
- # [:close, :string]
+ # [:end_group, :string]
# ]
#
# Tokens is the interface between Scanners and Encoders:
@@ -47,20 +47,11 @@ module CodeRay
#
# It also allows you to generate tokens directly (without using a scanner),
# to load them from a file, and still use any Encoder that CodeRay provides.
- #
- # Tokens' subclass TokenStream allows streaming to save memory.
class Tokens < Array
# The Scanner instance that created the tokens.
attr_accessor :scanner
- # Whether the object is a TokenStream.
- #
- # Returns false.
- def stream?
- false
- end
-
# Iterates over all tokens.
#
# If a filter is given, only tokens of that kind are yielded.
@@ -76,7 +67,7 @@ module CodeRay
end
# Iterates over all text tokens.
- # Range tokens like [:open, :string] are left out.
+ # Token actions are left out.
#
# Example:
# tokens.each_text_token { |text, kind| text.replace html_escape(text) }
@@ -117,9 +108,13 @@ module CodeRay
# For example, if you call +tokens.html+, the HTML encoder
# is used to highlight the tokens.
def method_missing meth, options = {}
- Encoders[meth].new(options).encode_tokens self
+ encode_with meth, options
end
-
+
+ def encode_with encoder, options = {}
+ Encoders[encoder].new(options).encode_tokens self
+ end
+
# Returns the tokens compressed by joining consecutive
# tokens of the same kind.
#
@@ -158,7 +153,7 @@ module CodeRay
replace optimize
end
- # Ensure that all :open tokens have a correspondent :close one.
+ # Ensure that all begin_group tokens have a correspondent end_group.
#
# TODO: Test this!
def fix
@@ -167,15 +162,15 @@ module CodeRay
opened = []
for type, kind in self
case type
- when :open
- opened.push [:close, kind]
+ when :begin_group
+ opened.push [:begin_group, kind]
when :begin_line
opened.push [:end_line, kind]
- when :close, :end_line
+ when :end_group, :end_line
expected = opened.pop
if [type, kind] != expected
- # Unexpected :close; decide what to do based on the kind:
- # - token was never opened: delete the :close (just skip it)
+ # Unexpected end; decide what to do based on the kind:
+ # - token was never opened: delete the end (just skip it)
next unless opened.rindex expected
# - token was opened earlier: also close tokens in between
tokens << token until (token = opened.pop) == expected
@@ -230,6 +225,11 @@ module CodeRay
dump = dump.gzip gzip_level
dump.extend Undumping
end
+
+ # Return the actual number of tokens.
+ def count
+ size / 2
+ end
# The total size of the tokens.
# Should be equal to the input size before
@@ -242,9 +242,7 @@ module CodeRay
size
end
- # The total size of the tokens.
- # Should be equal to the input size before
- # scanning.
+ # Return all text tokens joined into a single string.
def text
map { |t, k| t if t.is_a? ::String }.join
end
@@ -271,77 +269,12 @@ module CodeRay
@dump = Marshal.load dump
end
- end
-
-
- # = TokenStream
- #
- # The TokenStream class is a fake Array without elements.
- #
- # It redirects the method << to a block given at creation.
- #
- # This allows scanners and Encoders to use streaming (no
- # tokens are saved, the input is highlighted the same time it
- # is scanned) with the same code.
- #
- # See CodeRay.encode_stream and CodeRay.scan_stream
- class TokenStream < Tokens
-
- # Whether the object is a TokenStream.
- #
- # Returns true.
- def stream?
- true
- end
-
- # The Array is empty, but size counts the tokens given by <<.
- attr_reader :size
-
- # Creates a new TokenStream that calls +block+ whenever
- # its << method is called.
- #
- # Example:
- #
- # require 'coderay'
- #
- # token_stream = CodeRay::TokenStream.new do |text, kind|
- # puts 'kind: %s, text size: %d.' % [kind, text.size]
- # end
- #
- # token_stream << ['/\d+/', :regexp]
- # #-> kind: rexpexp, text size: 5.
- #
- def initialize &block
- raise ArgumentError, 'Block expected for streaming.' unless block
- @callback = block
- @size = 0
- end
-
- # Calls +block+ with +token+ and increments size.
- #
- # Returns self.
- def << token
- @callback.call(*token)
- @size += 1
- self
- end
-
- # This method is not implemented due to speed reasons. Use Tokens.
- def text_size
- raise NotImplementedError,
- 'This method is not implemented due to speed reasons.'
- end
-
- # A TokenStream cannot be dumped. Use Tokens.
- def dump
- raise NotImplementedError, 'A TokenStream cannot be dumped.'
- end
-
- # A TokenStream cannot be optimized. Use Tokens.
- def optimize
- raise NotImplementedError, 'A TokenStream cannot be optimized.'
- end
-
+ alias text_token push
+ def begin_group kind; push :begin_group, kind end
+ def end_group kind; push :end_group, kind end
+ def begin_line kind; push :begin_line, kind end
+ def end_line kind; push :end_line, kind end
+
end
end
@@ -369,17 +302,18 @@ class TokensTest < Test::Unit::TestCase
def test_adding_tokens
tokens = CodeRay::Tokens.new
assert_nothing_raised do
- tokens << ['string', :type]
- tokens << ['()', :operator]
+ tokens.text_token 'string', :type
+ tokens.text_token '()', :operator
end
- assert_equal tokens.size, 2
+ assert_equal tokens.size, 4
+ assert_equal tokens.count, 2
end
def test_dump_undump
tokens = CodeRay::Tokens.new
assert_nothing_raised do
- tokens << ['string', :type]
- tokens << ['()', :operator]
+ tokens.text_token 'string', :type
+ tokens.text_token '()', :operator
end
tokens2 = nil
assert_nothing_raised do