From 13c1a74215c2af9150f6e61adbfc6c3e48689770 Mon Sep 17 00:00:00 2001 From: murphy Date: Mon, 20 Apr 2009 21:08:33 +0000 Subject: Experimental Unicode support for Scanners in Ruby 1.9. * Python scanner uses it for idents. * Scanner#column method needed to be fixed for multibyte characters because StringScanner#pos still works on bytes. --- lib/coderay/scanners/python.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/coderay/scanners/python.rb') diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb index 05fe8d6..685232b 100644 --- a/lib/coderay/scanners/python.rb +++ b/lib/coderay/scanners/python.rb @@ -75,6 +75,7 @@ module Scanners state = :initial string_delimiter = nil import_clause = class_name_follows = last_token_dot = false + unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' until eos? @@ -109,7 +110,8 @@ module Scanners state = :string kind = :delimiter - elsif match = scan(/[[:alpha:]_][[:alnum:]_]*/ux) + elsif match = (unicode && scan(/[[:alpha:]_]\w*/ux)) || + scan(/[[:alpha:]_]\w*/x) kind = IDENT_KIND[match] # TODO: handle class, def, from, import # TODO: handle print, exec used as functions in Python 3 code -- cgit v1.2.1