summaryrefslogtreecommitdiff
path: root/lib/coderay/tokens.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/coderay/tokens.rb')
-rw-r--r--lib/coderay/tokens.rb99
1 files changed, 22 insertions, 77 deletions
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index 6957d69..e7bffce 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -1,55 +1,43 @@
module CodeRay
- # GZip library for writing and reading token dumps.
- autoload :GZip, coderay_path('helpers', 'gzip')
-
- # = Tokens TODO: Rewrite!
- #
- # The Tokens class represents a list of tokens returnd from
- # a Scanner.
+ # The Tokens class represents a list of tokens returned from
+ # a Scanner. It's actually just an Array with a few helper methods.
#
- # A token is not a special object, just a two-element Array
- # consisting of
+ # A token itself is not a special object, just two elements in an Array:
# * the _token_ _text_ (the original source of the token in a String) or
# a _token_ _action_ (begin_group, end_group, begin_line, end_line)
# * the _token_ _kind_ (a Symbol representing the type of the token)
#
- # A token looks like this:
+ # It looks like this:
#
- # ['# It looks like this', :comment]
- # ['3.1415926', :float]
- # ['$^', :error]
+ # ..., '# It looks like this', :comment, ...
+ # ..., '3.1415926', :float, ...
+ # ..., '$^', :error, ...
#
# Some scanners also yield sub-tokens, represented by special
- # token actions, namely begin_group and end_group.
+ # token actions, for example :begin_group and :end_group.
#
# The Ruby scanner, for example, splits "a string" into:
#
# [
- # [:begin_group, :string],
- # ['"', :delimiter],
- # ['a string', :content],
- # ['"', :delimiter],
- # [:end_group, :string]
+ # :begin_group, :string,
+ # '"', :delimiter,
+ # 'a string', :content,
+ # '"', :delimiter,
+ # :end_group, :string
# ]
#
- # Tokens is the interface between Scanners and Encoders:
- # The input is split and saved into a Tokens object. The Encoder
- # then builds the output from this object.
- #
- # Thus, the syntax below becomes clear:
+ # Tokens can be used to save the output of a Scanners in a simple
+ # Ruby object that can be send to an Encoder later:
#
- # CodeRay.scan('price = 2.59', :ruby).html
- # # the Tokens object is here -------^
- #
- # See how small it is? ;)
+ # tokens = CodeRay.scan('price = 2.59', :ruby).tokens
+ # tokens.encode(:html)
+ # tokens.html
+ # CodeRay.encoder(:html).encode_tokens(tokens)
#
# Tokens gives you the power to handle pre-scanned code very easily:
- # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
- # that you put in your DB.
- #
- # It also allows you to generate tokens directly (without using a scanner),
- # to load them from a file, and still use any Encoder that CodeRay provides.
+ # You can serialize it to a JSON string and store it in a database, pass it
+ # around to encode it more than once, send it to other algorithms...
class Tokens < Array
# The Scanner instance that created the tokens.
@@ -58,8 +46,7 @@ module CodeRay
# Encode the tokens using encoder.
#
# encoder can be
- # * a symbol like :html oder :statistic
- # * an Encoder class
+ # * a plugin name like :html oder 'statistic'
# * an Encoder object
#
# options are passed to the encoder.
@@ -157,53 +144,11 @@ module CodeRay
parts
end
- # Dumps the object into a String that can be saved
- # in files or databases.
- #
- # The dump is created with Marshal.dump;
- # In addition, it is gzipped using GZip.gzip.
- #
- # The returned String object includes Undumping
- # so it has an #undump method. See Tokens.load.
- #
- # You can configure the level of compression,
- # but the default value 7 should be what you want
- # in most cases as it is a good compromise between
- # speed and compression rate.
- #
- # See GZip module.
- def dump gzip_level = 7
- dump = Marshal.dump self
- dump = GZip.gzip dump, gzip_level
- dump.extend Undumping
- end
-
# Return the actual number of tokens.
def count
size / 2
end
- # Include this module to give an object an #undump
- # method.
- #
- # The string returned by Tokens.dump includes Undumping.
- module Undumping
- # Calls Tokens.load with itself.
- def undump
- Tokens.load self
- end
- end
-
- # Undump the object using Marshal.load, then
- # unzip it using GZip.gunzip.
- #
- # The result is commonly a Tokens object, but
- # this is not guaranteed.
- def Tokens.load dump
- dump = GZip.gunzip dump
- @dump = Marshal.load dump
- end
-
alias text_token push
def begin_group kind; push :begin_group, kind end
def end_group kind; push :end_group, kind end