Cleanup, documentation and enhancements to the Filters:

* TokenKindFilter handles groups (closes #223). * Added :docstring to the default KINDS_NOT_LOC list. * Also fixed a problem with the :tokens option in Scanners.
author: murphy <murphy@rubychan.de> 2010-05-18 06:19:53 +0000
committer: murphy <murphy@rubychan.de> 2010-05-18 06:19:53 +0000
commit: 9bab8e9248c538d92561686734d5dfae3a1bb42f (patch)
tree: 92cfb7b0dcb67faf477495c9823df1d6c7a5a1eb /lib/coderay
parent: f9da00a9da5fc15d08cd455884180d49417c5fe4 (diff)
download: coderay-9bab8e9248c538d92561686734d5dfae3a1bb42f.tar.gz
5 files changed, 114 insertions, 33 deletions
diff --git a/lib/coderay/encoders/comment_filter.rb b/lib/coderay/encoders/comment_filter.rb
index 0a6a2e8..33e2dfb 100644
--- a/lib/coderay/encoders/comment_filter.rb
+++ b/lib/coderay/encoders/comment_filter.rb
@@ -9,7 +9,7 @@ module Encoders
   # Alias: +remove_comments+
   # 
   # Usage:
-  #  CodeRay.scan('print # foo', :ruby).remove_comments.text
+  #  CodeRay.scan('print # foo', :ruby).comment_filter.text
   #  #-> "print "
   # 
   # See also: TokenKindFilter, LinesOfCode
@@ -58,22 +58,28 @@ docstring when the file is imported
 '''
 
 class Myclass():
-    """The class's docstring"""
+    """
+    The class's docstring
+    """
 
     def mymethod(self):
-        "The method's docstring"
+        '''The method's docstring'''
 
 def myfunction():
-    "The function's docstring"
+    """The function's docstring"""
     PYTHON
     assert_equal <<-PYTHON_FILTERED.chomp, tokens.comment_filter.text
 
+
 class Myclass():
     
+
     def mymethod(self):
         
+
 def myfunction():
     
+
 PYTHON_FILTERED
   end
   
diff --git a/lib/coderay/encoders/filter.rb b/lib/coderay/encoders/filter.rb
index 13621ff..e06fba7 100644
--- a/lib/coderay/encoders/filter.rb
+++ b/lib/coderay/encoders/filter.rb
@@ -3,7 +3,17 @@ module CodeRay
 module Encoders
   
   # A Filter encoder has another Tokens instance as output.
-  # It is used to select and delete tokens from the stream.
+  # It can be subclass to select, remove, or modify tokens in the stream.
+  # 
+  # Subclasses of Filter are called "Filters" and can be chained.
+  # 
+  # == Options
+  # 
+  # === :tokens
+  # 
+  # The Tokens object which will receive the output.
+  # 
+  # Default: Tokens.new
   # 
   # See also: TokenKindFilter
   class Filter < Encoder
@@ -12,37 +22,29 @@ module Encoders
     
   protected
     def setup options
-      @out = Tokens.new
-    end
-    
-    def include_text_token? text, kind
-      true
-    end
-    
-    def include_block_token? action, kind
-      true
+      @out = options[:tokens] || Tokens.new
     end
     
   public
     
-    def text_token text, kind
-      @out.text_token text, kind if include_text_token? text, kind
+    def text_token text, kind  # :nodoc:
+      @out.text_token text, kind
     end
     
-    def begin_group kind
-      @out.begin_group kind if include_block_token? :begin_group, kind
+    def begin_group kind  # :nodoc:
+      @out.begin_group kind
     end
     
-    def end_group kind
-      @out.end_group kind if include_block_token? :end_group, kind
+    def begin_line kind  # :nodoc:
+      @out.begin_line kind
     end
     
-    def begin_line kind
-      @out.begin_line kind if include_block_token? :begin_line, kind
+    def end_group kind  # :nodoc:
+      @out.end_group kind
     end
     
-    def end_line kind
-      @out.end_line kind if include_block_token? :end_line, kind
+    def end_line kind  # :nodoc:
+      @out.end_line kind
     end
     
   end
@@ -85,6 +87,9 @@ class FilterTest < Test::Unit::TestCase
       tokens.begin_group :index
       tokens.text_token i.to_s, :content
       tokens.end_group :index
+      tokens.begin_line :index
+      tokens.text_token i.to_s, :content
+      tokens.end_line :index
     end
     assert_equal tokens, CodeRay::Encoders::Filter.new.encode_tokens(tokens)
     assert_equal tokens, tokens.filter
diff --git a/lib/coderay/encoders/lines_of_code.rb b/lib/coderay/encoders/lines_of_code.rb
index 6b36aef..8ba82fa 100644
--- a/lib/coderay/encoders/lines_of_code.rb
+++ b/lib/coderay/encoders/lines_of_code.rb
@@ -21,6 +21,8 @@ module Encoders
     
     NON_EMPTY_LINE = /^\s*\S.*$/
     
+  protected
+    
     def compile tokens, options
       if scanner = tokens.scanner
         kinds_not_loc = scanner.class::KINDS_NOT_LOC
diff --git a/lib/coderay/encoders/token_kind_filter.rb b/lib/coderay/encoders/token_kind_filter.rb
index 431794d..e558594 100644
--- a/lib/coderay/encoders/token_kind_filter.rb
+++ b/lib/coderay/encoders/token_kind_filter.rb
@@ -24,17 +24,18 @@ module Encoders
   # 
   # See also: CommentFilter
   class TokenKindFilter < Filter
-
+    
     register_for :token_kind_filter
-
+    
     DEFAULT_OPTIONS = {
       :exclude => [],
       :include => :all
     }
-
+    
   protected
     def setup options
       super
+      @group_excluded = false
       @exclude = options[:exclude]
       @exclude = Array(@exclude) unless @exclude == :all
       @include = options[:include]
@@ -42,12 +43,70 @@ module Encoders
     end
     
     def include_text_token? text, kind
+      include_group? kind
+    end
+    
+    def include_group? kind
        (@include == :all || @include.include?(kind)) &&
       !(@exclude == :all || @exclude.include?(kind))
     end
     
+  public
+    
+    # Add the token to the output stream if +kind+ matches the conditions.
+    def text_token text, kind
+      super if !@group_excluded && include_text_token?(text, kind)
+    end
+    
+    # Add the token group to the output stream if +kind+ matches the
+    # conditions.
+    # 
+    # If it does not, all tokens inside the group are excluded from the
+    # stream, even if their kinds match.
+    def begin_group kind
+      if @group_excluded
+        @group_excluded += 1
+      elsif include_group? kind
+        super
+      else
+        @group_excluded = 1
+      end
+    end
+    
+    # See +begin_group+.
+    def begin_line kind
+      if @group_excluded
+        @group_excluded += 1
+      elsif include_group? kind
+        super
+      else
+        @group_excluded = 1
+      end
+    end
+    
+    # Take care of re-enabling the delegation of tokens to the output stream
+    # if an exluded group has ended.
+    def end_group kind
+      if @group_excluded
+        @group_excluded -= 1
+        @group_excluded = false if @group_excluded.zero?
+      else
+        super
+      end
+    end
+    
+    # See +end_group+.
+    def end_line kind
+      if @group_excluded
+        @group_excluded -= 1
+        @group_excluded = false if @group_excluded.zero?
+      else
+        super
+      end
+    end
+    
   end
-
+  
 end
 end
 
@@ -92,11 +151,18 @@ class TokenKindFilterTest < Test::Unit::TestCase
       tokens.begin_group :index
       tokens.text_token i.to_s, :content
       tokens.end_group :index
+      tokens.begin_group :naught if i == 5
+      tokens.end_group :naught if i == 7
+      tokens.begin_line :blubb
+      tokens.text_token i.to_s, :content
+      tokens.end_line :blubb
     end
-    assert_equal 20, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :include => :blubb).count
-    assert_equal 20, tokens.token_kind_filter(:include => :blubb).count
-    assert_equal 30, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :index).count
-    assert_equal 30, tokens.token_kind_filter(:exclude => :index).count
+    assert_equal 16, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :include => :blubb).count
+    assert_equal 16, tokens.token_kind_filter(:include => :blubb).count
+    assert_equal 24, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :include => [:blubb, :content]).count
+    assert_equal 24, tokens.token_kind_filter(:include => [:blubb, :content]).count
+    assert_equal 32, CodeRay::Encoders::TokenKindFilter.new.encode_tokens(tokens, :exclude => :index).count
+    assert_equal 32, tokens.token_kind_filter(:exclude => :index).count
   end
   
 end
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb
index 22f1c67..7d154da 100644
--- a/lib/coderay/scanner.rb
+++ b/lib/coderay/scanner.rb
@@ -57,7 +57,7 @@ module CodeRay
       # Define @default_options for subclasses.
       DEFAULT_OPTIONS = { }
       
-      KINDS_NOT_LOC = [:comment, :doctype]
+      KINDS_NOT_LOC = [:comment, :doctype, :docstring]
 
       class << self
 
@@ -149,6 +149,8 @@ module CodeRay
       # Scans the code and returns all tokens in a Tokens object.
       def tokenize new_string=nil, options = {}
         options = @options.merge(options)
+        @tokens = options[:tokens] || @tokens || Tokens.new
+        @tokens.scanner = self if @tokens.respond_to? :scanner=
         self.string = new_string if new_string
         reset unless new_string
         scan_tokens @tokens, options
author	murphy <murphy@rubychan.de>	2010-05-18 06:19:53 +0000
committer	murphy <murphy@rubychan.de>	2010-05-18 06:19:53 +0000
commit	9bab8e9248c538d92561686734d5dfae3a1bb42f (patch)
tree	92cfb7b0dcb67faf477495c9823df1d6c7a5a1eb /lib/coderay
parent	f9da00a9da5fc15d08cd455884180d49417c5fe4 (diff)
download	coderay-9bab8e9248c538d92561686734d5dfae3a1bb42f.tar.gz