summaryrefslogtreecommitdiff
path: root/lib/coderay
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2009-04-22 02:40:04 +0000
committermurphy <murphy@rubychan.de>2009-04-22 02:40:04 +0000
commit59b31ae8596f9606217b09d4e3f00dcf5aab8475 (patch)
treec1eaef9b3c98f48c449cd84a59c751528f7c45b5 /lib/coderay
parenta40476dc4a91737182f78fe939e1d91bd644ea99 (diff)
downloadcoderay-59b31ae8596f9606217b09d4e3f00dcf5aab8475.tar.gz
Improved Python scanner (issue #41).
* fixed numeric literals * better Python 3 support * bugfixes, optimizations * added two more test files
Diffstat (limited to 'lib/coderay')
-rw-r--r--lib/coderay/scanners/python.rb64
-rw-r--r--lib/coderay/styles/cycnus.rb2
-rwxr-xr-xlib/coderay/token_classes.rb3
3 files changed, 50 insertions, 19 deletions
diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb
index 685232b..6e86b88 100644
--- a/lib/coderay/scanners/python.rb
+++ b/lib/coderay/scanners/python.rb
@@ -15,10 +15,13 @@ module Scanners
'del', 'elif', 'else', 'except', 'finally', 'for',
'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
- 'exec', 'print', # gone in Python 3
'nonlocal', # new in Python 3
]
+ OLD_KEYWORDS = [
+ 'exec', 'print', # gone in Python 3
+ ]
+
PREDEFINED_METHODS_AND_TYPES = %w[
__import__ abs all any apply basestring bin bool buffer
bytearray bytes callable chr classmethod cmp coerce compile
@@ -53,12 +56,13 @@ module Scanners
IDENT_KIND = WordList.new(:ident).
add(KEYWORDS, :keyword).
+ add(OLD_KEYWORDS, :old_keyword).
add(PREDEFINED_METHODS_AND_TYPES, :predefined).
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
add(PREDEFINED_EXCEPTIONS, :exception)
- ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
- UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
+ ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
OPERATOR = /
\.\.\. | # ellipsis
@@ -66,10 +70,18 @@ module Scanners
[,;:()\[\]{}] | # simple delimiters
\/\/=? | \*\*=? | # special math
[-+*\/%&|^]=? | # ordinary math and binary logic
- [~@] | # whatever
+ ~ | # binary complement
<<=? | >>=? | [<>=]=? | != # comparison and assignment
/x
+ STRING_DELIMITER_REGEXP = Hash.new do |h, delimiter|
+ h[delimiter] = Regexp.union delimiter
+ end
+
+ STRING_CONTENT_REGEXP = Hash.new do |h, delimiter|
+ h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
+ end
+
def scan_tokens tokens, options
state = :initial
@@ -94,10 +106,10 @@ module Scanners
tokens << [match, :comment]
next
- elsif scan(/#{OPERATOR}/ox)
+ elsif scan(/#{OPERATOR}/o)
kind = :operator
- elsif match = scan(/(?i:(u?r?))?("""|"|'''|')/)
+ elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
tokens << [:open, :string]
string_delimiter = self[2]
string_raw = false
@@ -114,22 +126,35 @@ module Scanners
scan(/[[:alpha:]_]\w*/x)
kind = IDENT_KIND[match]
# TODO: handle class, def, from, import
- # TODO: handle print, exec used as functions in Python 3 code
+ # TODO: keyword arguments
kind = :ident if last_token_dot
+ kind = check(/\(/) ? :ident : :keyword if kind == :old_keyword
+
+ elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
+ kind = :decorator
- elsif scan(/0[xX][0-9A-Fa-f]+/)
+ elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
kind = :hex
- elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
+ elsif scan(/0[bB][01]+[lL]?/)
+ kind = :bin
+
+ elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
+ kind = :float
+ if scan(/[jJ]/)
+ match << matched
+ kind = :imaginary
+ end
+
+ elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
kind = :oct
- # TODO: Complex numbers
- elsif scan(/(?:\d+)(?![.eEfF])/)
+ elsif match = scan(/\d+([lL])?/)
kind = :integer
-
- # TODO: Floats
- elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
- kind = :float
+ if self[1] == nil && scan(/[jJ]/)
+ match << matched
+ kind = :imaginary
+ end
else
getch
@@ -138,17 +163,18 @@ module Scanners
end
when :string
- # TODO: cache Regexps
- if scan(Regexp.union(string_delimiter))
+ if scan(STRING_DELIMITER_REGEXP[string_delimiter])
tokens << [matched, :delimiter]
tokens << [:close, :string]
state = :initial
next
elsif string_delimiter.size == 3 && scan(/\n/)
kind = :content
- elsif scan(/ [^\\\n]+? (?= \\ | $ | #{Regexp.escape(string_delimiter)} ) /x)
+ elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
kind = :content
- elsif !string_raw && scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
+ kind = :char
+ elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
kind = :char
elsif scan(/ \\ . /x)
kind = :content
diff --git a/lib/coderay/styles/cycnus.rb b/lib/coderay/styles/cycnus.rb
index f1069b1..6d87db3 100644
--- a/lib/coderay/styles/cycnus.rb
+++ b/lib/coderay/styles/cycnus.rb
@@ -58,9 +58,11 @@ ol.CodeRay li { white-space: pre }
.ch .dl { color:#039 }
.cl { color:#B06; font-weight:bold }
+.cm { color:#A08; font-weight:bold }
.co { color:#036; font-weight:bold }
.cr { color:#0A0 }
.cv { color:#369 }
+.de { color:#B0B; }
.df { color:#099; font-weight:bold }
.di { color:#088; font-weight:bold }
.dl { color:black }
diff --git a/lib/coderay/token_classes.rb b/lib/coderay/token_classes.rb
index c71705b..ad7e5c8 100755
--- a/lib/coderay/token_classes.rb
+++ b/lib/coderay/token_classes.rb
@@ -15,8 +15,10 @@ module CodeRay
:class_variable => 'cv',
:color => 'cr',
:comment => 'c',
+ :complex => 'cm',
:constant => 'co',
:content => 'k',
+ :decorator => 'de',
:definition => 'df',
:delimiter => 'dl',
:directive => 'di',
@@ -31,6 +33,7 @@ module CodeRay
:function => 'fu',
:global_variable => 'gv',
:hex => 'hx',
+ :imaginary => 'cm',
:important => 'im',
:include => 'ic',
:inline => 'il',