Experimental Unicode support for Scanners in Ruby 1.9.

* Python scanner uses it for idents. * Scanner#column method needed to be fixed for multibyte characters because StringScanner#pos still works on bytes.
author: murphy <murphy@rubychan.de> 2009-04-20 21:08:33 +0000
committer: murphy <murphy@rubychan.de> 2009-04-20 21:08:33 +0000
commit: 13c1a74215c2af9150f6e61adbfc6c3e48689770 (patch)
tree: d9906e0dccf6d9004185f571850048c35918f2bf /lib/coderay/scanners
parent: ebcf3ab3e0814e4a0187ab23f3209ed162576ef8 (diff)
download: coderay-13c1a74215c2af9150f6e61adbfc6c3e48689770.tar.gz
1 files changed, 3 insertions, 1 deletions
diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb
index 05fe8d6..685232b 100644
--- a/lib/coderay/scanners/python.rb
+++ b/lib/coderay/scanners/python.rb
@@ -75,6 +75,7 @@ module Scanners
       state = :initial
       string_delimiter = nil
       import_clause = class_name_follows = last_token_dot = false
+      unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
       
       until eos?
         
@@ -109,7 +110,8 @@ module Scanners
             state = :string
             kind = :delimiter
           
-          elsif match = scan(/[[:alpha:]_][[:alnum:]_]*/ux)
+          elsif match = (unicode && scan(/[[:alpha:]_]\w*/ux)) ||
+                                    scan(/[[:alpha:]_]\w*/x)
             kind = IDENT_KIND[match]
             # TODO: handle class, def, from, import
             # TODO: handle print, exec used as functions in Python 3 code
author	murphy <murphy@rubychan.de>	2009-04-20 21:08:33 +0000
committer	murphy <murphy@rubychan.de>	2009-04-20 21:08:33 +0000
commit	13c1a74215c2af9150f6e61adbfc6c3e48689770 (patch)
tree	d9906e0dccf6d9004185f571850048c35918f2bf /lib/coderay/scanners
parent	ebcf3ab3e0814e4a0187ab23f3209ed162576ef8 (diff)
download	coderay-13c1a74215c2af9150f6e61adbfc6c3e48689770.tar.gz