diff options
author | murphy <murphy@rubychan.de> | 2009-06-07 14:21:50 +0000 |
---|---|---|
committer | murphy <murphy@rubychan.de> | 2009-06-07 14:21:50 +0000 |
commit | 0db86ecbd49ce957d5d27ae98607e83aa95c951b (patch) | |
tree | e4d368635e2add0630512aface4739da09154368 | |
parent | 3ece1d0ba395da1119bbcef3eb83fa7cdfa146b0 (diff) | |
download | coderay-0db86ecbd49ce957d5d27ae98607e83aa95c951b.tar.gz |
Improved UTF-8 support for Ruby Scanner. Also fixed a minor bug.
* closes #108 (new Ruby 1.9 call operator syntax sugar)
* Added an example for unicode code.
* automatic UTF-8 detection (experimental)
* Still problems with different Ruby versions; new unicode test fails in
Ruby 1.9 and JRuby.
-rw-r--r-- | lib/coderay/scanners/ruby.rb | 33 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby/patterns.rb | 8 | ||||
-rw-r--r-- | test/scanners/ruby/ruby19.expected.raydebug | 4 | ||||
-rw-r--r-- | test/scanners/ruby/ruby19.in.rb | 4 | ||||
-rw-r--r-- | test/scanners/ruby/unicode.expected.raydebug | 30 | ||||
-rw-r--r-- | test/scanners/ruby/unicode.in.rb | 30 |
6 files changed, 98 insertions, 11 deletions
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 721f0e4..b8cba97 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -21,6 +21,10 @@ module Scanners file_extension 'rb' helper :patterns + + if not defined? EncodingError + EncodingError = Class.new Exception + end private def scan_tokens tokens, options @@ -31,9 +35,10 @@ module Scanners state = :initial depth = nil inline_block_stack = [] - + unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' + patterns = Patterns # avoid constant lookup - + until eos? match = nil kind = nil @@ -161,7 +166,8 @@ module Scanners elsif state == :initial # IDENTS # - if match = scan(/#{patterns::METHOD_NAME}/o) + if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo : + /#{patterns::METHOD_NAME}/o) if last_token_dot kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end else @@ -175,7 +181,7 @@ module Scanners ## experimental! value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o) - elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o) + elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o) kind = :ident value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o) @@ -281,13 +287,19 @@ module Scanners else kind = :error - match = getch + match = (scan(/./mu) rescue nil) || getch + if !unicode && match.size > 1 + unicode = true + unscan + next + end end elsif state == :def_expected state = :initial - if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) + if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : + /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) kind = :method else next @@ -327,7 +339,14 @@ module Scanners end elsif state == :alias_expected - if match = scan(/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o) + begin + match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo : + /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o) + rescue EncodingError + raise if $DEBUG + end + + if match tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)] tokens << [self[2], :space] tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)] diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index e303894..cf5f8c1 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -31,18 +31,18 @@ module Scanners add(RESERVED_WORDS, :reserved). add(PREDEFINED_CONSTANTS, :pre_constant) - IDENT = /[a-z_][\w_]*/i + IDENT = /[^\W\d]\w*/ METHOD_NAME = / #{IDENT} [?!]? /ox METHOD_NAME_OPERATOR = / \*\*? # multiplication and power - | [-+~]@? # plus, minus, tilde with and without @ - | [\/%&|^`] # division, modulo or format strings, &and, |or, ^xor, `system` + | [-+~]@? # plus, minus, tilde with and without at sign + | [\/%&|^`] # division, modulo or format strings, and, or, xor, system | \[\]=? # array getter and setter | << | >> # append or shift left, shift right | <=?>? | >=? # comparison, rocket operator | ===? | =~ # simple equality, case equality, match - | ![~=@]? # negation with and without @, not-equal and not-match + | ![~=@]? # negation with and without at sign, not-equal and not-match /ox METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox INSTANCE_VARIABLE = / @ #{IDENT} /ox diff --git a/test/scanners/ruby/ruby19.expected.raydebug b/test/scanners/ruby/ruby19.expected.raydebug new file mode 100644 index 0000000..f3e40d5 --- /dev/null +++ b/test/scanners/ruby/ruby19.expected.raydebug @@ -0,0 +1,4 @@ +ident(block)operator(.)ident(()operator(*)ident(arguments)operator(\)) comment(# bovi's example) + +reserved(def) operator(()ident(foo)operator(\))operator(.)ident(bar) +reserved(end)
\ No newline at end of file diff --git a/test/scanners/ruby/ruby19.in.rb b/test/scanners/ruby/ruby19.in.rb new file mode 100644 index 0000000..45ec5f9 --- /dev/null +++ b/test/scanners/ruby/ruby19.in.rb @@ -0,0 +1,4 @@ +block.(*arguments) # bovi's example + +def (foo).bar +end
\ No newline at end of file diff --git a/test/scanners/ruby/unicode.expected.raydebug b/test/scanners/ruby/unicode.expected.raydebug new file mode 100644 index 0000000..dad2a74 --- /dev/null +++ b/test/scanners/ruby/unicode.expected.raydebug @@ -0,0 +1,30 @@ +ident(ä) operator(=) integer(42) +ident(print) ident(ä) + +reserved(def) method(straße)operator(()ident(frühstück)operator(\)) + ident(höhle)operator(()ident(frühstück)operator(\)) +reserved(end) + +reserved(alias) method(λ) method(lambda) +ident(×) operator(=) ident(λ)operator({) operator(|)ident(x)operator(,)ident(y)operator(|) ident(x)operator(*)ident(y)operator(}) +ident(×)operator([)integer(2)operator(,)integer(3)operator(]) comment(# => 6) + +comment(# Summe der ersten 10 Quadratzahlen) +reserved(def) method(∑) ident(enum) + ident(enum)operator(.)ident(inject)operator(()integer(0)operator(\)) operator({) operator(|)ident(sum)operator(,) ident(x)operator(|) ident(sum) operator(+) reserved(yield)operator(()ident(x)operator(\)) operator(}) +reserved(end) + +ident(∑)operator(()integer(1)operator(..)integer(10)operator(\)) operator({) operator(|)ident(x)operator(|) ident(x)operator(**)integer(2) operator(}) comment(# => 385) + +comment(# mehr Mathematische Zeichen) +reserved(def) method(∞)operator(;) float(1.0) operator(/) float(0.0)operator(;) reserved(end) +reserved(def) method(π)operator(;) constant(Math)operator(::)constant(PI)operator(;) reserved(end) + +operator(-)ident(∞) operator(..) integer(2)operator(*)ident(π) comment(# => -Infinity..6.28318530717959) + +comment(# Azumanga Daioh Insider) +reserved(class) operator(<<) class(Osaka) operator(=) constant(Object)operator(.)ident(new) + reserved(def) method(ぁ!) + ident(sleep) ident(∞) + reserved(end) +reserved(end)
\ No newline at end of file diff --git a/test/scanners/ruby/unicode.in.rb b/test/scanners/ruby/unicode.in.rb new file mode 100644 index 0000000..5474072 --- /dev/null +++ b/test/scanners/ruby/unicode.in.rb @@ -0,0 +1,30 @@ +ä = 42 +print ä + +def straße(frühstück) + höhle(frühstück) +end + +alias λ lambda +× = λ{ |x,y| x*y} +×[2,3] # => 6 + +# Summe der ersten 10 Quadratzahlen +def ∑ enum + enum.inject(0) { |sum, x| sum + yield(x) } +end + +∑(1..10) { |x| x**2 } # => 385 + +# mehr Mathematische Zeichen +def ∞; 1.0 / 0.0; end +def π; Math::PI; end + +-∞ .. 2*π # => -Infinity..6.28318530717959 + +# Azumanga Daioh Insider +class << Osaka = Object.new + def ぁ! + sleep ∞ + end +end
\ No newline at end of file |