Issue #227: Inline Diff Highlighting and improved intelligent diff

highlighting for multi-line tokens. These are one of the most complex features of CodeRay now, and quite unique, I think. Of course, all of this is highly experimental, and not ready for production use.
author: murphy <murphy@rubychan.de> 2010-06-01 21:53:10 +0000
committer: murphy <murphy@rubychan.de> 2010-06-01 21:53:10 +0000
commit: c30346cfa5d85d3c2f8903962322a07d695493d9 (patch)
tree: c7a137252eb825d0dc5b45dba2e89166116d3d74
parent: 39eb4c9f60e296f9f6c51784ef7e0a234966755d (diff)
download: coderay-c30346cfa5d85d3c2f8903962322a07d695493d9.tar.gz
2 files changed, 94 insertions, 9 deletions
diff --git a/Changes.textile b/Changes.textile
index 44daa5f..a8af808 100644
--- a/Changes.textile
+++ b/Changes.textile
@@ -132,12 +132,33 @@ h3. @Scanners::Diff@
 * *NEW*: Highlighting of code based on file names.
   See ticket "#52":http://redmine.rubychan.de/issues/52.
   
+  Use the @:highlight_code@ option to turn this feature off. It's enabled
+  by default.
+  
   This is a very original feature. It enables multi-language highlighting for
   diff files, which is especially helpful for CodeRay development itself. The
   updated version of the scanner test suite generated .debug.diff.html files
   using this.
   
-  Note: This is still buggy for multi-line tokens.
+  Note: This is still experimental. Tokens spanning more than one line
+  may get highlighted incorrectly. CodeRay tries to keep scanner states
+  between the lines and changes, but the quality of the results depend on
+  the scanner.
+* *NEW*: Inline change highlighting, as suggested by Eric Thomas.
+  See ticket "#227":http://redmine.rubychan.de/issues/227 for details.
+  
+  Use the @:inline_diff@ option to turn this feature off. It's enabled by
+  default.
+  
+  For single-line changes (that is, a single deleted line followed by a single
+  inserted line), this feature surrounds the changed parts with an
+  @:eyecatcher@ group which appears in a more saturated background color.
+  The implementation is quite complex, and highly experimental. The problem
+  with multi-layer tokenizing is that the tokens have to be split into parts.
+  If the inline change starts, say, in the middle of a string, then additional
+  @:end_group@ and @:begin_group@ tokens must be inserted to keep the group
+  nesting intact. The extended @Scanner#tokenize@ method and the new
+  @Tokens#split_into_parts@ method take care of this.
 * *NEW*: Highlight the file name in the change headers as @:filename@.
 * *CHANGED*: Highlight unknown lines as @:comment@ instead of @:head@.
 
diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb
index 050ffb1..b0192a9 100644
--- a/lib/coderay/scanners/diff.rb
+++ b/lib/coderay/scanners/diff.rb
@@ -9,6 +9,11 @@ module Scanners
     register_for :diff
     title 'diff output'
     
+    DEFAULT_OPTIONS = {
+      :highlight_code => true,
+      :inline_diff => true,
+    }
+    
   protected
     
     require 'coderay/helpers/file_type'
@@ -17,12 +22,17 @@ module Scanners
       
       line_kind = nil
       state = :initial
-      # TODO: Cache scanners
-      content_lang = nil
+      deleted_lines = 0
+      scanners = Hash.new do |h, lang|
+        h[lang] = Scanners[lang].new '', :keep_tokens => true, :keep_state => true
+      end
+      content_scanner = scanners[:plain]
+      content_scanner_entry_state = nil
       
       until eos?
         
         if match = scan(/\n/)
+          deleted_lines = 0 unless line_kind == :delete
           if line_kind
             encoder.end_line line_kind
             line_kind = nil
@@ -39,7 +49,10 @@ module Scanners
             encoder.text_token match, :head
             if match = scan(/.*?(?=$|[\t\n\x00]|  \(revision)/)
               encoder.text_token match, :filename
-              content_lang = FileType.fetch match, :plaintext
+              if options[:highlight_code]
+                content_scanner = scanners[FileType.fetch(match, :plaintext)]
+                content_scanner_entry_state = nil
+              end
             end
             next unless match = scan(/.+/)
             encoder.text_token match, :plain
@@ -60,6 +73,8 @@ module Scanners
             next unless match = scan(/.+/)
             encoder.text_token match, :plain
           elsif match = scan(/@@(?>[^@\n]*)@@/)
+            content_scanner.instance_variable_set(:@state, :initial) unless match?(/\n\+/)
+            content_scanner_entry_state = nil
             if check(/\n|$/)
               encoder.begin_line line_kind = :change
             else
@@ -70,22 +85,71 @@ module Scanners
             encoder.text_token match[-2,2], :change
             encoder.end_group :change unless line_kind
             next unless match = scan(/.+/)
-            CodeRay.scan match, content_lang, :tokens => encoder, :keep_tokens => true
+            if options[:highlight_code]
+              content_scanner.tokenize match, :tokens => encoder
+            else
+              encoder.text_token match, :plain
+            end
             next
           elsif match = scan(/\+/)
             encoder.begin_line line_kind = :insert
             encoder.text_token match, :insert
             next unless match = scan(/.+/)
-            CodeRay.scan match, content_lang, :tokens => encoder, :keep_tokens => true
+            if options[:highlight_code]
+              content_scanner.tokenize match, :tokens => encoder
+            else
+              encoder.text_token match, :plain
+            end
             next
           elsif match = scan(/-/)
+            deleted_lines += 1
             encoder.begin_line line_kind = :delete
             encoder.text_token match, :delete
-            next unless match = scan(/.+/)
-            CodeRay.scan match, content_lang, :tokens => encoder, :keep_tokens => true
+            if options[:inline_diff] && deleted_lines == 1 && check(/(?>.*)\n\+(?>.*)$(?!\n\+)/)
+              if content_scanner.instance_variable_defined?(:@state)
+                content_scanner_entry_state = content_scanner.instance_variable_get(:@state)
+              end
+              skip(/(.*)(.*?)(.*)\n\+\1(.*)\3$/)
+              pre, deleted, post = content_scanner.tokenize [self[1], self[2], self[3]], :tokens => Tokens.new
+              encoder.tokens pre
+              encoder.begin_group :eyecatcher
+              encoder.tokens deleted
+              encoder.end_group :eyecatcher
+              encoder.tokens post
+              encoder.end_line line_kind
+              encoder.text_token "\n", :space
+              encoder.begin_line line_kind = :insert
+              encoder.text_token '+', :insert
+              content_scanner.instance_variable_set(:@state, content_scanner_entry_state || :initial)
+              pre, inserted, post = content_scanner.tokenize [self[1], self[4], self[3]], :tokens => Tokens.new
+              encoder.tokens pre
+              encoder.begin_group :eyecatcher
+              encoder.tokens inserted
+              encoder.end_group :eyecatcher
+              encoder.tokens post
+            elsif match = scan(/.*/)
+              if options[:highlight_code]
+                if deleted_lines == 1 && content_scanner.instance_variable_defined?(:@state)
+                  content_scanner_entry_state = content_scanner.instance_variable_get(:@state)
+                end
+                content_scanner.tokenize match, :tokens => encoder unless match.empty?
+                if !match?(/\n-/)
+                  if match?(/\n\+/)
+                    content_scanner.instance_variable_set(:@state, content_scanner_entry_state || :initial)
+                  end
+                  content_scanner_entry_state = nil
+                end
+              else
+                encoder.text_token match, :plain
+              end
+            end
             next
           elsif match = scan(/ .*/)
-            CodeRay.scan match, content_lang, :tokens => encoder, :keep_tokens => true
+            if options[:highlight_code]
+              content_scanner.tokenize match, :tokens => encoder
+            else
+              encoder.text_token match, :plain
+            end
             next
           elsif match = scan(/.+/)
             encoder.begin_line line_kind = :comment
author	murphy <murphy@rubychan.de>	2010-06-01 21:53:10 +0000
committer	murphy <murphy@rubychan.de>	2010-06-01 21:53:10 +0000
commit	c30346cfa5d85d3c2f8903962322a07d695493d9 (patch)
tree	c7a137252eb825d0dc5b45dba2e89166116d3d74
parent	39eb4c9f60e296f9f6c51784ef7e0a234966755d (diff)
download	coderay-c30346cfa5d85d3c2f8903962322a07d695493d9.tar.gz