summaryrefslogtreecommitdiff
path: root/lib/coderay/tokens.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/coderay/tokens.rb')
-rw-r--r--lib/coderay/tokens.rb138
1 files changed, 36 insertions, 102 deletions
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index 2a0dc15..c85c2f1 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -1,6 +1,6 @@
module CodeRay
- # = Tokens
+ # = Tokens TODO: Rewrite!
#
# The Tokens class represents a list of tokens returnd from
# a Scanner.
@@ -8,7 +8,7 @@ module CodeRay
# A token is not a special object, just a two-element Array
# consisting of
# * the _token_ _text_ (the original source of the token in a String) or
- # a _token_ _action_ (:open, :close, :begin_line, :end_line)
+ # a _token_ _action_ (begin_group, end_group, begin_line, end_line)
# * the _token_ _kind_ (a Symbol representing the type of the token)
#
# A token looks like this:
@@ -18,16 +18,16 @@ module CodeRay
# ['$^', :error]
#
# Some scanners also yield sub-tokens, represented by special
- # token actions, namely :open and :close.
+ # token actions, namely begin_group and end_group.
#
# The Ruby scanner, for example, splits "a string" into:
#
# [
- # [:open, :string],
+ # [:begin_group, :string],
# ['"', :delimiter],
# ['a string', :content],
# ['"', :delimiter],
- # [:close, :string]
+ # [:end_group, :string]
# ]
#
# Tokens is the interface between Scanners and Encoders:
@@ -47,20 +47,11 @@ module CodeRay
#
# It also allows you to generate tokens directly (without using a scanner),
# to load them from a file, and still use any Encoder that CodeRay provides.
- #
- # Tokens' subclass TokenStream allows streaming to save memory.
class Tokens < Array
# The Scanner instance that created the tokens.
attr_accessor :scanner
- # Whether the object is a TokenStream.
- #
- # Returns false.
- def stream?
- false
- end
-
# Iterates over all tokens.
#
# If a filter is given, only tokens of that kind are yielded.
@@ -76,7 +67,7 @@ module CodeRay
end
# Iterates over all text tokens.
- # Range tokens like [:open, :string] are left out.
+ # Token actions are left out.
#
# Example:
# tokens.each_text_token { |text, kind| text.replace html_escape(text) }
@@ -117,9 +108,13 @@ module CodeRay
# For example, if you call +tokens.html+, the HTML encoder
# is used to highlight the tokens.
def method_missing meth, options = {}
- Encoders[meth].new(options).encode_tokens self
+ encode_with meth, options
end
-
+
+ def encode_with encoder, options = {}
+ Encoders[encoder].new(options).encode_tokens self
+ end
+
# Returns the tokens compressed by joining consecutive
# tokens of the same kind.
#
@@ -158,7 +153,7 @@ module CodeRay
replace optimize
end
- # Ensure that all :open tokens have a correspondent :close one.
+ # Ensure that all begin_group tokens have a correspondent end_group.
#
# TODO: Test this!
def fix
@@ -167,15 +162,15 @@ module CodeRay
opened = []
for type, kind in self
case type
- when :open
- opened.push [:close, kind]
+ when :begin_group
+ opened.push [:begin_group, kind]
when :begin_line
opened.push [:end_line, kind]
- when :close, :end_line
+ when :end_group, :end_line
expected = opened.pop
if [type, kind] != expected
- # Unexpected :close; decide what to do based on the kind:
- # - token was never opened: delete the :close (just skip it)
+ # Unexpected end; decide what to do based on the kind:
+ # - token was never opened: delete the end (just skip it)
next unless opened.rindex expected
# - token was opened earlier: also close tokens in between
tokens << token until (token = opened.pop) == expected
@@ -230,6 +225,11 @@ module CodeRay
dump = dump.gzip gzip_level
dump.extend Undumping
end
+
+ # Return the actual number of tokens.
+ def count
+ size / 2
+ end
# The total size of the tokens.
# Should be equal to the input size before
@@ -242,9 +242,7 @@ module CodeRay
size
end
- # The total size of the tokens.
- # Should be equal to the input size before
- # scanning.
+ # Return all text tokens joined into a single string.
def text
map { |t, k| t if t.is_a? ::String }.join
end
@@ -271,77 +269,12 @@ module CodeRay
@dump = Marshal.load dump
end
- end
-
-
- # = TokenStream
- #
- # The TokenStream class is a fake Array without elements.
- #
- # It redirects the method << to a block given at creation.
- #
- # This allows scanners and Encoders to use streaming (no
- # tokens are saved, the input is highlighted the same time it
- # is scanned) with the same code.
- #
- # See CodeRay.encode_stream and CodeRay.scan_stream
- class TokenStream < Tokens
-
- # Whether the object is a TokenStream.
- #
- # Returns true.
- def stream?
- true
- end
-
- # The Array is empty, but size counts the tokens given by <<.
- attr_reader :size
-
- # Creates a new TokenStream that calls +block+ whenever
- # its << method is called.
- #
- # Example:
- #
- # require 'coderay'
- #
- # token_stream = CodeRay::TokenStream.new do |text, kind|
- # puts 'kind: %s, text size: %d.' % [kind, text.size]
- # end
- #
- # token_stream << ['/\d+/', :regexp]
- # #-> kind: rexpexp, text size: 5.
- #
- def initialize &block
- raise ArgumentError, 'Block expected for streaming.' unless block
- @callback = block
- @size = 0
- end
-
- # Calls +block+ with +token+ and increments size.
- #
- # Returns self.
- def << token
- @callback.call(*token)
- @size += 1
- self
- end
-
- # This method is not implemented due to speed reasons. Use Tokens.
- def text_size
- raise NotImplementedError,
- 'This method is not implemented due to speed reasons.'
- end
-
- # A TokenStream cannot be dumped. Use Tokens.
- def dump
- raise NotImplementedError, 'A TokenStream cannot be dumped.'
- end
-
- # A TokenStream cannot be optimized. Use Tokens.
- def optimize
- raise NotImplementedError, 'A TokenStream cannot be optimized.'
- end
-
+ alias text_token push
+ def begin_group kind; push :begin_group, kind end
+ def end_group kind; push :end_group, kind end
+ def begin_line kind; push :begin_line, kind end
+ def end_line kind; push :end_line, kind end
+
end
end
@@ -369,17 +302,18 @@ class TokensTest < Test::Unit::TestCase
def test_adding_tokens
tokens = CodeRay::Tokens.new
assert_nothing_raised do
- tokens << ['string', :type]
- tokens << ['()', :operator]
+ tokens.text_token 'string', :type
+ tokens.text_token '()', :operator
end
- assert_equal tokens.size, 2
+ assert_equal tokens.size, 4
+ assert_equal tokens.count, 2
end
def test_dump_undump
tokens = CodeRay::Tokens.new
assert_nothing_raised do
- tokens << ['string', :type]
- tokens << ['()', :operator]
+ tokens.text_token 'string', :type
+ tokens.text_token '()', :operator
end
tokens2 = nil
assert_nothing_raised do