summaryrefslogtreecommitdiff
path: root/lib/coderay/scanner.rb
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2009-04-20 21:08:33 +0000
committermurphy <murphy@rubychan.de>2009-04-20 21:08:33 +0000
commit13c1a74215c2af9150f6e61adbfc6c3e48689770 (patch)
treed9906e0dccf6d9004185f571850048c35918f2bf /lib/coderay/scanner.rb
parentebcf3ab3e0814e4a0187ab23f3209ed162576ef8 (diff)
downloadcoderay-13c1a74215c2af9150f6e61adbfc6c3e48689770.tar.gz
Experimental Unicode support for Scanners in Ruby 1.9.
* Python scanner uses it for idents. * Scanner#column method needed to be fixed for multibyte characters because StringScanner#pos still works on bytes.
Diffstat (limited to 'lib/coderay/scanner.rb')
-rw-r--r--lib/coderay/scanner.rb15
1 files changed, 14 insertions, 1 deletions
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb
index fd5625e..839b9fc 100644
--- a/lib/coderay/scanner.rb
+++ b/lib/coderay/scanner.rb
@@ -65,7 +65,14 @@ module CodeRay
def normify code
code = code.to_s
- code.force_encoding 'binary' if code.respond_to? :force_encoding
+ if code.respond_to? :force_encoding
+ begin
+ code.force_encoding 'utf-8'
+ code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
+ rescue ArgumentError
+ code.force_encoding 'binary'
+ end
+ end
code.to_unix
end
@@ -181,6 +188,11 @@ module CodeRay
def column pos = self.pos
return 0 if pos <= 0
+ string = string()
+ if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
+ @bin_string ||= string.dup.force_encoding(:binary)
+ string = @bin_string
+ end
pos - (string.rindex(?\n, pos) || 0)
end
@@ -207,6 +219,7 @@ module CodeRay
def reset_instance
@tokens.clear unless @options[:keep_tokens]
@cached_tokens = nil
+ @bin_string = nil if defined? @bin_string
end
# Scanner error with additional status information