rake test now runs in debug mode.

All .rb files converted to UNIX format (where did the \r come from?)
author: murphy <murphy@rubychan.de> 2006-07-11 05:40:21 +0000
committer: murphy <murphy@rubychan.de> 2006-07-11 05:40:21 +0000
commit: 7bb2aef0553091a10c197e302475c9f14de8a860 (patch)
tree: 72ea9444276fe97dc99ae82aa9e46070ec7ba9ea /lib/coderay/tokens.rb
parent: 26a8e5a0388199ac686db28d631b05a5b5aa02e1 (diff)
download: coderay-7bb2aef0553091a10c197e302475c9f14de8a860.tar.gz
1 files changed, 322 insertions, 322 deletions
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index 8b8c692..c8c62e0 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -1,322 +1,322 @@
-module CodeRay
-
-  # = Tokens
-  #
-  # The Tokens class represents a list of tokens returnd from
-  # a Scanner.
-  #
-  # A token is not a special object, just a two-element Array
-  # consisting of
-  # * the _token_ _kind_ (a Symbol representing the type of the token)
-  # * the _token_ _text_ (the original source of the token in a String)
-  #
-  # A token looks like this:
-  #
-  #   [:comment, '# It looks like this']
-  #   [:float, '3.1415926']
-  #   [:error, 'äöü']
-  #
-  # Some scanners also yield some kind of sub-tokens, represented by special
-  # token texts, namely :open and :close .
-  #
-  # The Ruby scanner, for example, splits "a string" into:
-  #
-  #  [
-  #   [:open, :string],
-  #   [:delimiter, '"'],
-  #   [:content, 'a string'],
-  #   [:delimiter, '"'],
-  #   [:close, :string]
-  #  ]
-  #
-  # Tokens is also the interface between Scanners and Encoders:
-  # The input is split and saved into a Tokens object. The Encoder
-  # then builds the output from this object.
-  #
-  # Thus, the syntax below becomes clear:
-  #
-  #   CodeRay.scan('price = 2.59', :ruby).html
-  #   # the Tokens object is here -------^
-  #
-  # See how small it is? ;)
-  #
-  # Tokens gives you the power to handle pre-scanned code very easily:
-  # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
-  # that you put in your DB.
-  #
-  # Tokens' subclass TokenStream allows streaming to save memory.
-  class Tokens < Array
-
-    class << self
-
-      # Convert the token to a string.
-      #
-      # This format is used by Encoders.Tokens.
-      # It can be reverted using read_token.
-      def write_token text, type
-        if text.is_a? String
-          "#{type}\t#{escape(text)}\n"
-        else
-          ":#{text}\t#{type}\t\n"
-        end
-      end
-
-      # Read a token from the string.
-      #
-      # Inversion of write_token.
-      #
-      # TODO Test this!
-      def read_token token
-        type, text = token.split("\t", 2)
-        if type[0] == ?:
-          [text.to_sym, type[1..-1].to_sym]
-        else
-          [type.to_sym, unescape(text)]
-        end
-      end
-
-      # Escapes a string for use in write_token.
-      def escape text
-        text.gsub(/[\n\\]/, '\\\\\&')
-      end
-
-      # Unescapes a string created by escape.
-      def unescape text
-        text.gsub(/\\[\n\\]/) { |m| m[1,1] }
-      end
-
-    end
-
-    # Whether the object is a TokenStream.
-    #
-    # Returns false.
-    def stream?
-      false
-    end
-
-    # Iterates over all tokens.
-    #
-    # If a filter is given, only tokens of that kind are yielded.
-    def each kind_filter = nil, &block
-      unless kind_filter
-        super(&block)
-      else
-        super() do |text, kind|
-          next unless kind == kind_filter
-          yield text, kind
-        end
-      end
-    end
-
-    # Iterates over all text tokens.
-    # Range tokens like [:open, :string] are left out.
-    #
-    # Example:
-    #   tokens.each_text_token { |text, kind| text.replace html_escape(text) }
-    def each_text_token
-      each do |text, kind|
-        next unless text.respond_to? :to_str
-        yield text, kind
-      end
-    end
-
-    # Encode the tokens using encoder.
-    #
-    # encoder can be
-    # * a symbol like :html oder :statistic
-    # * an Encoder class
-    # * an Encoder object
-    #
-    # options are passed to the encoder.
-    def encode encoder, options = {}
-      unless encoder.is_a? Encoders::Encoder
-        unless encoder.is_a? Class
-          encoder_class = Encoders[encoder]
-        end
-        encoder = encoder_class.new options
-      end
-      encoder.encode_tokens self, options
-    end
-
-
-    # Turn into a string using Encoders::Text.
-    #
-    # +options+ are passed to the encoder if given.
-    def to_s options = {}
-      encode :text, options
-    end
-
-
-    # Redirects unknown methods to encoder calls.
-    #
-    # For example, if you call +tokens.html+, the HTML encoder
-    # is used to highlight the tokens.
-    def method_missing meth, options = {}
-      Encoders[meth].new(options).encode_tokens self
-    end
-
-    # Returns the tokens compressed by joining consecutive
-    # tokens of the same kind.
-    #
-    # This can not be undone, but should yield the same output
-    # in most Encoders.  It basically makes the output smaller.
-    #
-    # Combined with dump, it saves space for the cost of time.
-    #
-    # If the scanner is written carefully, this is not required -
-    # for example, consecutive //-comment lines could already be
-    # joined in one comment token by the Scanner.
-    def optimize
-      print ' Tokens#optimize: before: %d - ' % size if $DEBUG
-      last_kind = last_text = nil
-      new = self.class.new
-      each do |text, kind|
-        if text.is_a? String
-          if kind == last_kind
-            last_text << text
-          else
-            new << [last_text, last_kind] if last_kind
-            last_text = text
-            last_kind = kind
-          end
-        else
-          new << [last_text, last_kind] if last_kind
-          last_kind = last_text = nil
-          new << [text, kind]
-        end
-      end
-      new << [last_text, last_kind] if last_kind
-      print 'after: %d (%d saved = %2.0f%%)' %
-        [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
-      new
-    end
-
-    # Compact the object itself; see optimize.
-    def optimize!
-      replace optimize
-    end
-
-    # Dumps the object into a String that can be saved
-    # in files or databases.
-    #
-    # The dump is created with Marshal.dump;
-    # In addition, it is gzipped using GZip.gzip.
-    #
-    # The returned String object includes Undumping
-    # so it has an #undump method. See Tokens.load.
-    #
-    # You can configure the level of compression,
-    # but the default value 7 should be what you want
-    # in most cases as it is a good comprimise between
-    # speed and compression rate.
-    #
-    # See GZip module.
-    def dump gzip_level = 7
-      require 'coderay/helpers/gzip_simple'
-      dump = Marshal.dump self
-      dump = dump.gzip gzip_level
-      dump.extend Undumping
-    end
-
-    # The total size of the tokens.
-    # Should be equal to the input size before
-    # scanning.
-    def text_size
-      map { |t, k| t }.join.size
-    end
-
-    # Include this module to give an object an #undump
-    # method.
-    #
-    # The string returned by Tokens.dump includes Undumping.
-    module Undumping
-      # Calls Tokens.load with itself.
-      def undump
-        Tokens.load self
-      end
-    end
-
-    # Undump the object using Marshal.load, then
-    # unzip it using GZip.gunzip.
-    #
-    # The result is commonly a Tokens object, but
-    # this is not guaranteed.
-    def Tokens.load dump
-      require 'coderay/helpers/gzip_simple'
-      dump = dump.gunzip
-      @dump = Marshal.load dump
-    end
-
-  end
-
-
-  # = TokenStream
-  #
-  # The TokenStream class is a fake Array without elements.
-  #
-  # It redirects the method << to a block given at creation.
-  #
-  # This allows scanners and Encoders to use streaming (no
-  # tokens are saved, the input is highlighted the same time it
-  # is scanned) with the same code.
-  #
-  # See CodeRay.encode_stream and CodeRay.scan_stream
-  class TokenStream < Tokens
-
-    # Whether the object is a TokenStream.
-    #
-    # Returns true.
-    def stream?
-      true
-    end
-
-    # The Array is empty, but size counts the tokens given by <<.
-    attr_reader :size
-
-    # Creates a new TokenStream that calls +block+ whenever
-    # its << method is called.
-    #
-    # Example:
-    #
-    #   require 'coderay'
-    #   
-    #   token_stream = CodeRay::TokenStream.new do |kind, text|
-    #     puts 'kind: %s, text size: %d.' % [kind, text.size]
-    #   end
-    #   
-    #   token_stream << [:regexp, '/\d+/']
-    #   #-> kind: rexpexp, text size: 5.
-    #
-    def initialize &block
-      raise ArgumentError, 'Block expected for streaming.' unless block
-      @callback = block
-      @size = 0
-    end
-
-    # Calls +block+ with +token+ and increments size.
-    #
-    # Returns self.
-    def << token
-      @callback.call token
-      @size += 1
-      self
-    end
-
-    # This method is not implemented due to speed reasons. Use Tokens.
-    def text_size
-      raise NotImplementedError, 'This method is not implemented due to speed reasons.'
-    end
-
-    # A TokenStream cannot be dumped. Use Tokens.
-    def dump
-      raise NotImplementedError, 'A TokenStream cannot be dumped.'
-    end
-
-    # A TokenStream cannot be optimized. Use Tokens.
-    def optimize
-      raise NotImplementedError, 'A TokenStream cannot be optimized.'
-    end
-
-  end
-
-end
+module CodeRay
+
+  # = Tokens
+  #
+  # The Tokens class represents a list of tokens returnd from
+  # a Scanner.
+  #
+  # A token is not a special object, just a two-element Array
+  # consisting of
+  # * the _token_ _kind_ (a Symbol representing the type of the token)
+  # * the _token_ _text_ (the original source of the token in a String)
+  #
+  # A token looks like this:
+  #
+  #   [:comment, '# It looks like this']
+  #   [:float, '3.1415926']
+  #   [:error, 'äöü']
+  #
+  # Some scanners also yield some kind of sub-tokens, represented by special
+  # token texts, namely :open and :close .
+  #
+  # The Ruby scanner, for example, splits "a string" into:
+  #
+  #  [
+  #   [:open, :string],
+  #   [:delimiter, '"'],
+  #   [:content, 'a string'],
+  #   [:delimiter, '"'],
+  #   [:close, :string]
+  #  ]
+  #
+  # Tokens is also the interface between Scanners and Encoders:
+  # The input is split and saved into a Tokens object. The Encoder
+  # then builds the output from this object.
+  #
+  # Thus, the syntax below becomes clear:
+  #
+  #   CodeRay.scan('price = 2.59', :ruby).html
+  #   # the Tokens object is here -------^
+  #
+  # See how small it is? ;)
+  #
+  # Tokens gives you the power to handle pre-scanned code very easily:
+  # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
+  # that you put in your DB.
+  #
+  # Tokens' subclass TokenStream allows streaming to save memory.
+  class Tokens < Array
+
+    class << self
+
+      # Convert the token to a string.
+      #
+      # This format is used by Encoders.Tokens.
+      # It can be reverted using read_token.
+      def write_token text, type
+        if text.is_a? String
+          "#{type}\t#{escape(text)}\n"
+        else
+          ":#{text}\t#{type}\t\n"
+        end
+      end
+
+      # Read a token from the string.
+      #
+      # Inversion of write_token.
+      #
+      # TODO Test this!
+      def read_token token
+        type, text = token.split("\t", 2)
+        if type[0] == ?:
+          [text.to_sym, type[1..-1].to_sym]
+        else
+          [type.to_sym, unescape(text)]
+        end
+      end
+
+      # Escapes a string for use in write_token.
+      def escape text
+        text.gsub(/[\n\\]/, '\\\\\&')
+      end
+
+      # Unescapes a string created by escape.
+      def unescape text
+        text.gsub(/\\[\n\\]/) { |m| m[1,1] }
+      end
+
+    end
+
+    # Whether the object is a TokenStream.
+    #
+    # Returns false.
+    def stream?
+      false
+    end
+
+    # Iterates over all tokens.
+    #
+    # If a filter is given, only tokens of that kind are yielded.
+    def each kind_filter = nil, &block
+      unless kind_filter
+        super(&block)
+      else
+        super() do |text, kind|
+          next unless kind == kind_filter
+          yield text, kind
+        end
+      end
+    end
+
+    # Iterates over all text tokens.
+    # Range tokens like [:open, :string] are left out.
+    #
+    # Example:
+    #   tokens.each_text_token { |text, kind| text.replace html_escape(text) }
+    def each_text_token
+      each do |text, kind|
+        next unless text.respond_to? :to_str
+        yield text, kind
+      end
+    end
+
+    # Encode the tokens using encoder.
+    #
+    # encoder can be
+    # * a symbol like :html oder :statistic
+    # * an Encoder class
+    # * an Encoder object
+    #
+    # options are passed to the encoder.
+    def encode encoder, options = {}
+      unless encoder.is_a? Encoders::Encoder
+        unless encoder.is_a? Class
+          encoder_class = Encoders[encoder]
+        end
+        encoder = encoder_class.new options
+      end
+      encoder.encode_tokens self, options
+    end
+
+
+    # Turn into a string using Encoders::Text.
+    #
+    # +options+ are passed to the encoder if given.
+    def to_s options = {}
+      encode :text, options
+    end
+
+
+    # Redirects unknown methods to encoder calls.
+    #
+    # For example, if you call +tokens.html+, the HTML encoder
+    # is used to highlight the tokens.
+    def method_missing meth, options = {}
+      Encoders[meth].new(options).encode_tokens self
+    end
+
+    # Returns the tokens compressed by joining consecutive
+    # tokens of the same kind.
+    #
+    # This can not be undone, but should yield the same output
+    # in most Encoders.  It basically makes the output smaller.
+    #
+    # Combined with dump, it saves space for the cost of time.
+    #
+    # If the scanner is written carefully, this is not required -
+    # for example, consecutive //-comment lines could already be
+    # joined in one comment token by the Scanner.
+    def optimize
+      print ' Tokens#optimize: before: %d - ' % size if $DEBUG
+      last_kind = last_text = nil
+      new = self.class.new
+      each do |text, kind|
+        if text.is_a? String
+          if kind == last_kind
+            last_text << text
+          else
+            new << [last_text, last_kind] if last_kind
+            last_text = text
+            last_kind = kind
+          end
+        else
+          new << [last_text, last_kind] if last_kind
+          last_kind = last_text = nil
+          new << [text, kind]
+        end
+      end
+      new << [last_text, last_kind] if last_kind
+      print 'after: %d (%d saved = %2.0f%%)' %
+        [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
+      new
+    end
+
+    # Compact the object itself; see optimize.
+    def optimize!
+      replace optimize
+    end
+
+    # Dumps the object into a String that can be saved
+    # in files or databases.
+    #
+    # The dump is created with Marshal.dump;
+    # In addition, it is gzipped using GZip.gzip.
+    #
+    # The returned String object includes Undumping
+    # so it has an #undump method. See Tokens.load.
+    #
+    # You can configure the level of compression,
+    # but the default value 7 should be what you want
+    # in most cases as it is a good comprimise between
+    # speed and compression rate.
+    #
+    # See GZip module.
+    def dump gzip_level = 7
+      require 'coderay/helpers/gzip_simple'
+      dump = Marshal.dump self
+      dump = dump.gzip gzip_level
+      dump.extend Undumping
+    end
+
+    # The total size of the tokens.
+    # Should be equal to the input size before
+    # scanning.
+    def text_size
+      map { |t, k| t }.join.size
+    end
+
+    # Include this module to give an object an #undump
+    # method.
+    #
+    # The string returned by Tokens.dump includes Undumping.
+    module Undumping
+      # Calls Tokens.load with itself.
+      def undump
+        Tokens.load self
+      end
+    end
+
+    # Undump the object using Marshal.load, then
+    # unzip it using GZip.gunzip.
+    #
+    # The result is commonly a Tokens object, but
+    # this is not guaranteed.
+    def Tokens.load dump
+      require 'coderay/helpers/gzip_simple'
+      dump = dump.gunzip
+      @dump = Marshal.load dump
+    end
+
+  end
+
+
+  # = TokenStream
+  #
+  # The TokenStream class is a fake Array without elements.
+  #
+  # It redirects the method << to a block given at creation.
+  #
+  # This allows scanners and Encoders to use streaming (no
+  # tokens are saved, the input is highlighted the same time it
+  # is scanned) with the same code.
+  #
+  # See CodeRay.encode_stream and CodeRay.scan_stream
+  class TokenStream < Tokens
+
+    # Whether the object is a TokenStream.
+    #
+    # Returns true.
+    def stream?
+      true
+    end
+
+    # The Array is empty, but size counts the tokens given by <<.
+    attr_reader :size
+
+    # Creates a new TokenStream that calls +block+ whenever
+    # its << method is called.
+    #
+    # Example:
+    #
+    #   require 'coderay'
+    #   
+    #   token_stream = CodeRay::TokenStream.new do |kind, text|
+    #     puts 'kind: %s, text size: %d.' % [kind, text.size]
+    #   end
+    #   
+    #   token_stream << [:regexp, '/\d+/']
+    #   #-> kind: rexpexp, text size: 5.
+    #
+    def initialize &block
+      raise ArgumentError, 'Block expected for streaming.' unless block
+      @callback = block
+      @size = 0
+    end
+
+    # Calls +block+ with +token+ and increments size.
+    #
+    # Returns self.
+    def << token
+      @callback.call token
+      @size += 1
+      self
+    end
+
+    # This method is not implemented due to speed reasons. Use Tokens.
+    def text_size
+      raise NotImplementedError, 'This method is not implemented due to speed reasons.'
+    end
+
+    # A TokenStream cannot be dumped. Use Tokens.
+    def dump
+      raise NotImplementedError, 'A TokenStream cannot be dumped.'
+    end
+
+    # A TokenStream cannot be optimized. Use Tokens.
+    def optimize
+      raise NotImplementedError, 'A TokenStream cannot be optimized.'
+    end
+
+  end
+
+end
author	murphy <murphy@rubychan.de>	2006-07-11 05:40:21 +0000
committer	murphy <murphy@rubychan.de>	2006-07-11 05:40:21 +0000
commit	7bb2aef0553091a10c197e302475c9f14de8a860 (patch)
tree	72ea9444276fe97dc99ae82aa9e46070ec7ba9ea /lib/coderay/tokens.rb
parent	26a8e5a0388199ac686db28d631b05a5b5aa02e1 (diff)
download	coderay-7bb2aef0553091a10c197e302475c9f14de8a860.tar.gz