summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2009-06-07 14:21:50 +0000
committermurphy <murphy@rubychan.de>2009-06-07 14:21:50 +0000
commit0db86ecbd49ce957d5d27ae98607e83aa95c951b (patch)
treee4d368635e2add0630512aface4739da09154368
parent3ece1d0ba395da1119bbcef3eb83fa7cdfa146b0 (diff)
downloadcoderay-0db86ecbd49ce957d5d27ae98607e83aa95c951b.tar.gz
Improved UTF-8 support for Ruby Scanner. Also fixed a minor bug.
* closes #108 (new Ruby 1.9 call operator syntax sugar) * Added an example for unicode code. * automatic UTF-8 detection (experimental) * Still problems with different Ruby versions; new unicode test fails in Ruby 1.9 and JRuby.
-rw-r--r--lib/coderay/scanners/ruby.rb33
-rw-r--r--lib/coderay/scanners/ruby/patterns.rb8
-rw-r--r--test/scanners/ruby/ruby19.expected.raydebug4
-rw-r--r--test/scanners/ruby/ruby19.in.rb4
-rw-r--r--test/scanners/ruby/unicode.expected.raydebug30
-rw-r--r--test/scanners/ruby/unicode.in.rb30
6 files changed, 98 insertions, 11 deletions
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index 721f0e4..b8cba97 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -21,6 +21,10 @@ module Scanners
file_extension 'rb'
helper :patterns
+
+ if not defined? EncodingError
+ EncodingError = Class.new Exception
+ end
private
def scan_tokens tokens, options
@@ -31,9 +35,10 @@ module Scanners
state = :initial
depth = nil
inline_block_stack = []
-
+ unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
+
patterns = Patterns # avoid constant lookup
-
+
until eos?
match = nil
kind = nil
@@ -161,7 +166,8 @@ module Scanners
elsif state == :initial
# IDENTS #
- if match = scan(/#{patterns::METHOD_NAME}/o)
+ if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
+ /#{patterns::METHOD_NAME}/o)
if last_token_dot
kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
else
@@ -175,7 +181,7 @@ module Scanners
## experimental!
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
- elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)
+ elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
kind = :ident
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
@@ -281,13 +287,19 @@ module Scanners
else
kind = :error
- match = getch
+ match = (scan(/./mu) rescue nil) || getch
+ if !unicode && match.size > 1
+ unicode = true
+ unscan
+ next
+ end
end
elsif state == :def_expected
state = :initial
- if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
+ if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
+ /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
kind = :method
else
next
@@ -327,7 +339,14 @@ module Scanners
end
elsif state == :alias_expected
- if match = scan(/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
+ begin
+ match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
+ /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
+ rescue EncodingError
+ raise if $DEBUG
+ end
+
+ if match
tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
tokens << [self[2], :space]
tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb
index e303894..cf5f8c1 100644
--- a/lib/coderay/scanners/ruby/patterns.rb
+++ b/lib/coderay/scanners/ruby/patterns.rb
@@ -31,18 +31,18 @@ module Scanners
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :pre_constant)
- IDENT = /[a-z_][\w_]*/i
+ IDENT = /[^\W\d]\w*/
METHOD_NAME = / #{IDENT} [?!]? /ox
METHOD_NAME_OPERATOR = /
\*\*? # multiplication and power
- | [-+~]@? # plus, minus, tilde with and without @
- | [\/%&|^`] # division, modulo or format strings, &and, |or, ^xor, `system`
+ | [-+~]@? # plus, minus, tilde with and without at sign
+ | [\/%&|^`] # division, modulo or format strings, and, or, xor, system
| \[\]=? # array getter and setter
| << | >> # append or shift left, shift right
| <=?>? | >=? # comparison, rocket operator
| ===? | =~ # simple equality, case equality, match
- | ![~=@]? # negation with and without @, not-equal and not-match
+ | ![~=@]? # negation with and without at sign, not-equal and not-match
/ox
METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
INSTANCE_VARIABLE = / @ #{IDENT} /ox
diff --git a/test/scanners/ruby/ruby19.expected.raydebug b/test/scanners/ruby/ruby19.expected.raydebug
new file mode 100644
index 0000000..f3e40d5
--- /dev/null
+++ b/test/scanners/ruby/ruby19.expected.raydebug
@@ -0,0 +1,4 @@
+ident(block)operator(.)ident(()operator(*)ident(arguments)operator(\)) comment(# bovi's example)
+
+reserved(def) operator(()ident(foo)operator(\))operator(.)ident(bar)
+reserved(end) \ No newline at end of file
diff --git a/test/scanners/ruby/ruby19.in.rb b/test/scanners/ruby/ruby19.in.rb
new file mode 100644
index 0000000..45ec5f9
--- /dev/null
+++ b/test/scanners/ruby/ruby19.in.rb
@@ -0,0 +1,4 @@
+block.(*arguments) # bovi's example
+
+def (foo).bar
+end \ No newline at end of file
diff --git a/test/scanners/ruby/unicode.expected.raydebug b/test/scanners/ruby/unicode.expected.raydebug
new file mode 100644
index 0000000..dad2a74
--- /dev/null
+++ b/test/scanners/ruby/unicode.expected.raydebug
@@ -0,0 +1,30 @@
+ident(ä) operator(=) integer(42)
+ident(print) ident(ä)
+
+reserved(def) method(straße)operator(()ident(frühstück)operator(\))
+ ident(höhle)operator(()ident(frühstück)operator(\))
+reserved(end)
+
+reserved(alias) method(λ) method(lambda)
+ident(×) operator(=) ident(λ)operator({) operator(|)ident(x)operator(,)ident(y)operator(|) ident(x)operator(*)ident(y)operator(})
+ident(×)operator([)integer(2)operator(,)integer(3)operator(]) comment(# => 6)
+
+comment(# Summe der ersten 10 Quadratzahlen)
+reserved(def) method(∑) ident(enum)
+ ident(enum)operator(.)ident(inject)operator(()integer(0)operator(\)) operator({) operator(|)ident(sum)operator(,) ident(x)operator(|) ident(sum) operator(+) reserved(yield)operator(()ident(x)operator(\)) operator(})
+reserved(end)
+
+ident(∑)operator(()integer(1)operator(..)integer(10)operator(\)) operator({) operator(|)ident(x)operator(|) ident(x)operator(**)integer(2) operator(}) comment(# => 385)
+
+comment(# mehr Mathematische Zeichen)
+reserved(def) method(∞)operator(;) float(1.0) operator(/) float(0.0)operator(;) reserved(end)
+reserved(def) method(π)operator(;) constant(Math)operator(::)constant(PI)operator(;) reserved(end)
+
+operator(-)ident(∞) operator(..) integer(2)operator(*)ident(π) comment(# => -Infinity..6.28318530717959)
+
+comment(# Azumanga Daioh Insider)
+reserved(class) operator(<<) class(Osaka) operator(=) constant(Object)operator(.)ident(new)
+ reserved(def) method(ぁ!)
+ ident(sleep) ident(∞)
+ reserved(end)
+reserved(end) \ No newline at end of file
diff --git a/test/scanners/ruby/unicode.in.rb b/test/scanners/ruby/unicode.in.rb
new file mode 100644
index 0000000..5474072
--- /dev/null
+++ b/test/scanners/ruby/unicode.in.rb
@@ -0,0 +1,30 @@
+ä = 42
+print ä
+
+def straße(frühstück)
+ höhle(frühstück)
+end
+
+alias λ lambda
+× = λ{ |x,y| x*y}
+×[2,3] # => 6
+
+# Summe der ersten 10 Quadratzahlen
+def ∑ enum
+ enum.inject(0) { |sum, x| sum + yield(x) }
+end
+
+∑(1..10) { |x| x**2 } # => 385
+
+# mehr Mathematische Zeichen
+def ∞; 1.0 / 0.0; end
+def π; Math::PI; end
+
+-∞ .. 2*π # => -Infinity..6.28318530717959
+
+# Azumanga Daioh Insider
+class << Osaka = Object.new
+ def ぁ!
+ sleep ∞
+ end
+end \ No newline at end of file