summaryrefslogtreecommitdiff
path: root/lib/coderay
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2009-06-13 01:24:56 +0000
committermurphy <murphy@rubychan.de>2009-06-13 01:24:56 +0000
commit6287a02ff06e5f3af73e031117ab3876f1db348a (patch)
treec2eac7073e6bcf84054ee20601f4f824c31ce751 /lib/coderay
parent052399c510acf6bb5ae374b7270acbdf9b861b71 (diff)
downloadcoderay-6287a02ff06e5f3af73e031117ab3876f1db348a.tar.gz
Python Scanner (issue #41) updated:
* class and method definitions are highlighted * improved keyword argument recognition (less false positives) * backticks are operators now * from/import support still missing
Diffstat (limited to 'lib/coderay')
-rw-r--r--lib/coderay/scanners/python.rb123
1 files changed, 71 insertions, 52 deletions
diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb
index f72a29e..47fba08 100644
--- a/lib/coderay/scanners/python.rb
+++ b/lib/coderay/scanners/python.rb
@@ -70,7 +70,7 @@ module Scanners
[,;:()\[\]{}] | # simple delimiters
\/\/=? | \*\*=? | # special math
[-+*\/%&|^]=? | # ordinary math and binary logic
- ~ | # binary complement
+ [~`] | # binary complement and inspection
<<=? | >>=? | [<>=]=? | != # comparison and assignment
/x
@@ -82,10 +82,16 @@ module Scanners
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
end
+ DEF_NEW_STATE = WordList.new(:initial).
+ add(%w(def), :def_expected).
+ # add(%w(import from), :include_expected).
+ add(%w(class), :class_expected)
+
def scan_tokens tokens, options
state = :initial
string_delimiter = nil
+ string_raw = false
import_clause = class_name_follows = last_token_dot = false
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
@@ -94,19 +100,41 @@ module Scanners
kind = nil
match = nil
- case state
-
- when :initial
-
- if match = scan(/ [ \t]+ | \\?\n /x)
- tokens << [match, :space]
- next
-
- elsif match = scan(/ \# [^\n]* /mx)
- tokens << [match, :comment]
+ if state == :string
+ if scan(STRING_DELIMITER_REGEXP[string_delimiter])
+ tokens << [matched, :delimiter]
+ tokens << [:close, :string]
+ state = :initial
next
+ elsif string_delimiter.size == 3 && scan(/\n/)
+ kind = :content
+ elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
+ kind = :content
+ elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
+ kind = :char
+ elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
+ kind = :char
+ elsif scan(/ \\ . /x)
+ kind = :content
+ elsif scan(/ \\ | $ /x)
+ tokens << [:close, :string]
+ kind = :error
+ state = :initial
+ else
+ raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
+ end
+
+ elsif match = scan(/ [ \t]+ | \\?\n /x)
+ tokens << [match, :space]
+ next
+
+ elsif match = scan(/ \# [^\n]* /mx)
+ tokens << [match, :comment]
+ next
+
+ elsif state == :initial
- elsif scan(/#{OPERATOR}/o)
+ if scan(/#{OPERATOR}/o)
kind = :operator
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
@@ -122,14 +150,20 @@ module Scanners
state = :string
kind = :delimiter
- elsif match = (unicode && scan(/[[:alpha:]_]\w*/ux)) ||
- scan(/[[:alpha:]_]\w*/x)
+ # TODO: backticks
+
+ elsif match = scan(unicode ? /[[:alpha:]_]\w*/ux : /[[:alpha:]_]\w*/x)
kind = IDENT_KIND[match]
- # TODO: handle class, def, from, import
+ # TODO: from, import
# TODO: keyword arguments
kind = :ident if last_token_dot
- kind = check(/\(/) ? :ident : :keyword if kind == :old_keyword
- kind = :ident if kind == :predefined && check(/=/)
+ if kind == :old_keyword
+ kind = check(/\(/) ? :ident : :keyword
+ elsif kind == :predefined && check(/ *=/)
+ kind = :ident
+ elsif kind == :keyword
+ state = DEF_NEW_STATE[match]
+ end
elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
kind = :decorator
@@ -162,51 +196,36 @@ module Scanners
kind = :error
end
-
- when :string
- if scan(STRING_DELIMITER_REGEXP[string_delimiter])
- tokens << [matched, :delimiter]
- tokens << [:close, :string]
- state = :initial
- next
- elsif string_delimiter.size == 3 && scan(/\n/)
- kind = :content
- elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
- kind = :content
- elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
- kind = :char
- elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
- kind = :char
- elsif scan(/ \\ . /x)
- kind = :content
- elsif scan(/ \\ | $ /x)
- tokens << [:close, :string]
- kind = :error
- state = :initial
+
+ elsif state == :def_expected
+ state = :initial
+ if match = scan(unicode ? /[[:alpha:]_]\w*/ux : /[[:alpha:]_]\w*/x)
+ kind = :method
else
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
+ next
end
- when :include_expected
- if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
- kind = :include
- state = :initial
-
- elsif match = scan(/\s+/)
- kind = :space
- state = :initial if match.index ?\n
-
+ elsif state == :class_expected
+ state = :initial
+ if match = scan(unicode ? /[[:alpha:]_]\w*/ux : /[[:alpha:]_]\w*/x)
+ kind = :class
else
- getch
- kind = :error
+ next
+ end
+ elsif state == :include_expected
+ state = :initial
+ if match = scan(unicode ? /[[:alpha:]_]\w*/ux : /[[:alpha:]_]\w*/x)
+ kind = :include
+ else
+ next
end
else
raise_inspect 'Unknown state', tokens, state
-
+
end
-
+
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %