summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2009-10-19 17:25:57 +0000
committermurphy <murphy@rubychan.de>2009-10-19 17:25:57 +0000
commit094616e18e4a0f441fe6f88c65dc1b86be5668d2 (patch)
treef785565c18598425c11ef52a39616531041f3db2 /lib/coderay/scanners
parent98cd8c95c53d7865db469f742c541f274057770a (diff)
downloadcoderay-094616e18e4a0f441fe6f88c65dc1b86be5668d2.tar.gz
Updated Python scanner (#41)
* Unicode support (kind of) * [from ...] import ... as construct highlighted as :include * added a test case for import statements
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r--lib/coderay/scanners/python.rb49
1 files changed, 41 insertions, 8 deletions
diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb
index 47fba08..b0aa82a 100644
--- a/lib/coderay/scanners/python.rb
+++ b/lib/coderay/scanners/python.rb
@@ -61,6 +61,7 @@ module Scanners
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
add(PREDEFINED_EXCEPTIONS, :exception)
+ NAME = / [^\W\d] \w* /x
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
@@ -84,9 +85,15 @@ module Scanners
DEF_NEW_STATE = WordList.new(:initial).
add(%w(def), :def_expected).
- # add(%w(import from), :include_expected).
+ add(%w(import from), :include_expected).
add(%w(class), :class_expected)
+ DESCRIPTOR = /
+ #{NAME}
+ (?: \. #{NAME} )*
+ | \*
+ /x
+
def scan_tokens tokens, options
state = :initial
@@ -94,6 +101,7 @@ module Scanners
string_raw = false
import_clause = class_name_follows = last_token_dot = false
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
+ from_import_state = []
until eos?
@@ -124,8 +132,13 @@ module Scanners
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
end
- elsif match = scan(/ [ \t]+ | \\?\n /x)
+ elsif match = scan(/ [ \t]+ | \\\n /x)
+ tokens << [match, :space]
+ next
+
+ elsif match = scan(/\n/)
tokens << [match, :space]
+ state = :initial if state == :include_expected
next
elsif match = scan(/ \# [^\n]* /mx)
@@ -152,9 +165,8 @@ module Scanners
# TODO: backticks
- elsif match = scan(unicode ? /[[:alpha:]_]\w*/ux : /[[:alpha:]_]\w*/x)
+ elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
kind = IDENT_KIND[match]
- # TODO: from, import
# TODO: keyword arguments
kind = :ident if last_token_dot
if kind == :old_keyword
@@ -163,6 +175,7 @@ module Scanners
kind = :ident
elsif kind == :keyword
state = DEF_NEW_STATE[match]
+ from_import_state << match.to_sym if state == :include_expected
end
elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
@@ -199,7 +212,7 @@ module Scanners
elsif state == :def_expected
state = :initial
- if match = scan(unicode ? /[[:alpha:]_]\w*/ux : /[[:alpha:]_]\w*/x)
+ if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
kind = :method
else
next
@@ -207,17 +220,37 @@ module Scanners
elsif state == :class_expected
state = :initial
- if match = scan(unicode ? /[[:alpha:]_]\w*/ux : /[[:alpha:]_]\w*/x)
+ if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
kind = :class
else
next
end
elsif state == :include_expected
- state = :initial
- if match = scan(unicode ? /[[:alpha:]_]\w*/ux : /[[:alpha:]_]\w*/x)
+ if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
kind = :include
+ if match == 'as'
+ kind = :keyword
+ from_import_state << :as
+ elsif from_import_state.first == :from && match == 'import'
+ kind = :keyword
+ from_import_state << :import
+ elsif from_import_state.last == :as
+ # kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
+ kind = :ident
+ from_import_state.pop
+ elsif IDENT_KIND[match] == :keyword
+ unscan
+ match = nil
+ state = :initial
+ next
+ end
+ elsif match = scan(/,/)
+ from_import_state.pop if from_import_state.last == :as
+ kind = :operator
else
+ from_import_state = []
+ state = :initial
next
end