Experimental Unicode support for Scanners in Ruby 1.9.

* Python scanner uses it for idents. * Scanner#column method needed to be fixed for multibyte characters because StringScanner#pos still works on bytes.
author: murphy <murphy@rubychan.de> 2009-04-20 21:08:33 +0000
committer: murphy <murphy@rubychan.de> 2009-04-20 21:08:33 +0000
commit: 13c1a74215c2af9150f6e61adbfc6c3e48689770 (patch)
tree: d9906e0dccf6d9004185f571850048c35918f2bf /lib/coderay/scanner.rb
parent: ebcf3ab3e0814e4a0187ab23f3209ed162576ef8 (diff)
download: coderay-13c1a74215c2af9150f6e61adbfc6c3e48689770.tar.gz
1 files changed, 14 insertions, 1 deletions
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb
index fd5625e..839b9fc 100644
--- a/lib/coderay/scanner.rb
+++ b/lib/coderay/scanner.rb
@@ -65,7 +65,14 @@ module CodeRay
 
         def normify code
           code = code.to_s
-          code.force_encoding 'binary' if code.respond_to? :force_encoding
+          if code.respond_to? :force_encoding
+            begin
+              code.force_encoding 'utf-8'
+              code[/\z/]  # raises an ArgumentError when code contains a non-UTF-8 char
+            rescue ArgumentError
+              code.force_encoding 'binary'
+            end
+          end
           code.to_unix
         end
         
@@ -181,6 +188,11 @@ module CodeRay
       
       def column pos = self.pos
         return 0 if pos <= 0
+        string = string()
+        if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
+          @bin_string ||= string.dup.force_encoding(:binary)
+          string = @bin_string
+        end
         pos - (string.rindex(?\n, pos) || 0)
       end
 
@@ -207,6 +219,7 @@ module CodeRay
       def reset_instance
         @tokens.clear unless @options[:keep_tokens]
         @cached_tokens = nil
+        @bin_string = nil if defined? @bin_string
       end
 
       # Scanner error with additional status information
author	murphy <murphy@rubychan.de>	2009-04-20 21:08:33 +0000
committer	murphy <murphy@rubychan.de>	2009-04-20 21:08:33 +0000
commit	13c1a74215c2af9150f6e61adbfc6c3e48689770 (patch)
tree	d9906e0dccf6d9004185f571850048c35918f2bf /lib/coderay/scanner.rb
parent	ebcf3ab3e0814e4a0187ab23f3209ed162576ef8 (diff)
download	coderay-13c1a74215c2af9150f6e61adbfc6c3e48689770.tar.gz