diff options
-rw-r--r-- | bench/bench.rb | 1 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby.rb | 41 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby/patterns.rb | 13 | ||||
-rw-r--r-- | test/scanners/coderay_suite.rb | 6 | ||||
-rw-r--r-- | test/scanners/ruby/1.expected.raydebug | 2 | ||||
-rw-r--r-- | test/scanners/ruby/evil.expected.raydebug | 76 | ||||
-rw-r--r-- | test/scanners/ruby/example.expected.raydebug | 2 | ||||
-rw-r--r-- | test/scanners/ruby/strange.expected.raydebug | 7 | ||||
-rw-r--r-- | test/scanners/ruby/strange.in.rb | 7 |
9 files changed, 92 insertions, 63 deletions
diff --git a/bench/bench.rb b/bench/bench.rb index c34c738..29e2e40 100644 --- a/bench/bench.rb +++ b/bench/bench.rb @@ -53,7 +53,6 @@ $dump_input = lang == 'dump' $dump_output = format == 'dump' require 'coderay/helpers/gzip_simple.rb' if $dump_input -MYDIR = File.dirname __FILE__ def here fn = nil return MYDIR unless fn File.join here, fn diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 1ad9d03..d15af0d 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -24,7 +24,7 @@ module Scanners private def scan_tokens tokens, options last_token_dot = false - fancy_allowed = regexp_allowed = true + value_expected = true heredocs = nil last_state = nil state = :initial @@ -68,7 +68,7 @@ module Scanners tokens << [modifiers, :modifier] unless modifiers.empty? end tokens << [:close, state.type] - fancy_allowed = regexp_allowed = false + value_expected = false state = state.next_state when '\\' @@ -93,7 +93,7 @@ module Scanners case peek(1)[0] when ?{ inline_block_stack << [state, depth, heredocs] - fancy_allowed = regexp_allowed = true + value_expected = true state = :initial depth = 1 tokens << [:open, :inline] @@ -123,7 +123,6 @@ module Scanners # {{{ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) - fancy_allowed = true case m = match[0] when ?\s, ?\t, ?\f match << scan(/\s*/) unless eos? or heredocs @@ -131,7 +130,7 @@ module Scanners when ?\n, ?\\ kind = :space if m == ?\n - regexp_allowed = true + value_expected = true # FIXME not quite true state = :initial if state == :undef_comma_expected end if heredocs @@ -145,9 +144,10 @@ module Scanners end when ?#, ?=, ?_ kind = :comment - regexp_allowed = true + value_expected = true else - raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens + raise_inspect 'else-case _ reached, because case %p was + not handled' % [matched[0].chr], tokens end tokens << [match, kind] next @@ -167,13 +167,17 @@ module Scanners end end ## experimental! - fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+[%\/][^\s=]/) + value_expected = :set if + patterns::REGEXP_ALLOWED[match] or check(/#{patterns::VALUE_FOLLOWS}/o) + + elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o) + kind = :ident + value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o) # OPERATORS # - elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or - (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)) + elsif not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x) if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ - regexp_allowed = fancy_allowed = :set + value_expected = :set end last_token_dot = :set if match == '.' or match == '::' kind = :operator @@ -200,7 +204,7 @@ module Scanners elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) kind = :instance_variable - elsif regexp_allowed and match = scan(/\//) + elsif value_expected and match = scan(/\//) tokens << [:open, :regexp] kind = :delimiter interpreted = true @@ -222,10 +226,10 @@ module Scanners end elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) - regexp_allowed = fancy_allowed = :set + value_expected = :set kind = :operator - elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o) + elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o) indented = self[1] == '-' quote = self[3] delim = self[quote ? 4 : 2] @@ -237,7 +241,7 @@ module Scanners heredocs ||= [] # create heredocs if empty heredocs << heredoc - elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o) + elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o) kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do raise_inspect 'Unknown fancy string: %%%p' % k, tokens end @@ -245,11 +249,11 @@ module Scanners state = patterns::StringState.new kind, interpreted, self[2] kind = :delimiter - elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o) + elsif value_expected and match = scan(/#{patterns::CHARACTER}/o) kind = :integer elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) - regexp_allowed = fancy_allowed = :set + value_expected = :set kind = :operator elsif match = scan(/`/) @@ -326,8 +330,7 @@ module Scanners end # }}} - regexp_allowed = regexp_allowed == :set - fancy_allowed = fancy_allowed == :set + value_expected = value_expected == :set last_token_dot = last_token_dot == :set if $DEBUG and not kind diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index 6023b21..51cdb95 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -127,6 +127,19 @@ module Scanners .*? (?: \Z | (?=^\#CODE) ) /mx + + # Checks for a valid value to follow. This enables + # fancy_allowed in method calls. + VALUE_FOLLOWS = / + \s+ + (?: + [%\/][^\s=] + | + <<-?\S + | + #{CHARACTER} + ) + /x RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo diff --git a/test/scanners/coderay_suite.rb b/test/scanners/coderay_suite.rb index 280e182..6a2725c 100644 --- a/test/scanners/coderay_suite.rb +++ b/test/scanners/coderay_suite.rb @@ -152,11 +152,11 @@ module CodeRay File.open(actual_filename, 'wb') { |f| f.write result } if ENV['diff'] diff = expected_filename.sub(/\.expected\..*/, '.debug.diff') - system "diff #{expected_filename} #{actual_filename} > #{diff}" - system "EDITOR #{diff}" + system "diff --text #{expected_filename} #{actual_filename} > #{diff}" + system "EDITOR #{diff}" if ENV['diffed'] end end - unless ENV['diff'] or ENV['noassert'] + unless ENV['noassert'] assert(ok, "Scan error: unexpected output") end else diff --git a/test/scanners/ruby/1.expected.raydebug b/test/scanners/ruby/1.expected.raydebug index 510e7c6..61e3dbb 100644 --- a/test/scanners/ruby/1.expected.raydebug +++ b/test/scanners/ruby/1.expected.raydebug @@ -17,7 +17,7 @@ reserved(module) class(Bytes) constant(FactorOfSuffix)operator([)ident(suff)operator(]) reserved(end) - reserved(def) constant(Bytes)operator(.)operator([]) ident(str) + reserved(def) constant(Bytes)operator(.)ident([]) ident(str) ident(n)operator(,) ident(fac) operator(=) ident(str) operator(/) regexp<delimiter(/)content((.+\)([A-Z]\))delimiter(/)> ident(n) operator(=) ident(n)operator(.)ident(to_i) ident(fac) operator(=) ident(factor_of_suffix) ident(fac) diff --git a/test/scanners/ruby/evil.expected.raydebug b/test/scanners/ruby/evil.expected.raydebug index b1e3231..5518de0 100644 --- a/test/scanners/ruby/evil.expected.raydebug +++ b/test/scanners/ruby/evil.expected.raydebug @@ -4,10 +4,10 @@ reserved(class) class(Class) reserved(end) reserved(end) comment(#def String(x\) x.to_s end #it's already built-in. duh!) -reserved(def) constant(String)operator(.)operator(*)operator(()ident(right)operator(\)) operator([)pre_constant(self)operator(,)ident(right)operator(]) reserved(end) -reserved(def) constant(String)operator(.)operator(<<)operator(()ident(right)operator(\)) operator([)pre_constant(self)operator(,)symbol(:<<)operator(,)ident(right)operator(]) reserved(end) -reserved(def) constant(String)operator(.)operator(/)operator(()ident(right)operator(\)) operator([)pre_constant(self)operator(,)symbol(:/)operator(,)ident(right)operator(]) reserved(end) -reserved(def) constant(String)operator(.)operator([])operator(()ident(right)operator(\)) operator([)pre_constant(self)operator(,)symbol(:[])operator(,)ident(right)operator(]) reserved(end) +reserved(def) constant(String)operator(.)ident(*)operator(()ident(right)operator(\)) operator([)pre_constant(self)operator(,)ident(right)operator(]) reserved(end) +reserved(def) constant(String)operator(.)ident(<<)operator(()ident(right)operator(\)) operator([)pre_constant(self)operator(,)symbol(:<<)operator(,)ident(right)operator(]) reserved(end) +reserved(def) constant(String)operator(.)ident(/)operator(()ident(right)operator(\)) operator([)pre_constant(self)operator(,)symbol(:/)operator(,)ident(right)operator(]) reserved(end) +reserved(def) constant(String)operator(.)ident([])operator(()ident(right)operator(\)) operator([)pre_constant(self)operator(,)symbol(:[])operator(,)ident(right)operator(]) reserved(end) ident(p)operator(()constant(String)operator(::)constant(Class)operator(\)) ident(p)operator(()constant(String)operator(::) constant(Class)operator(\)) ident(p)operator(()constant(String) operator(::)constant(Class)operator(\)) @@ -78,15 +78,15 @@ pre_constant(false) operator(?) ident(P?)operator(:) ident(p8) ident(P?) symbol(:p8) pre_constant(false) operator(?) ident(P?) operator(:) ident(p8) -pre_constant(self)operator(.)operator([])symbol(:p8) -pre_constant(false) operator(?) pre_constant(self)operator(.)operator([])operator(:) ident(p8) -pre_constant(self)operator(.)operator([]) symbol(:p8) -pre_constant(false) operator(?) pre_constant(self)operator(.)operator([]) operator(:) ident(p8) +pre_constant(self)operator(.)ident([])symbol(:p8) +pre_constant(false) operator(?) pre_constant(self)operator(.)ident([])operator(:) ident(p8) +pre_constant(self)operator(.)ident([]) symbol(:p8) +pre_constant(false) operator(?) pre_constant(self)operator(.)ident([]) operator(:) ident(p8) -pre_constant(self)operator(.)operator(<=>)symbol(:p8) -pre_constant(false) operator(?) pre_constant(self)operator(.)operator(<=>)operator(:) ident(p8) -pre_constant(self)operator(.)operator(<=>) symbol(:p8) -pre_constant(false) operator(?) pre_constant(self)operator(.)operator(<=>) operator(:) ident(p8) +pre_constant(self)operator(.)ident(<=>)symbol(:p8) +pre_constant(false) operator(?) pre_constant(self)operator(.)ident(<=>)operator(:) ident(p8) +pre_constant(self)operator(.)ident(<=>) symbol(:p8) +pre_constant(false) operator(?) pre_constant(self)operator(.)ident(<=>) operator(:) ident(p8) pre_constant(self) operator(<=>)symbol(:p8) comment(#false ? self <=>: p8 #gives ruby indigestion) @@ -390,9 +390,9 @@ reserved(def) method(`)operator(()ident(s)operator(\)) reserved(end) reserved(end) -integer(69)operator(.)operator(`)operator(()string<delimiter(')content(what a world)delimiter(')>operator(\)) +integer(69)operator(.)ident(`)operator(()string<delimiter(')content(what a world)delimiter(')>operator(\)) -integer(79)operator(::)operator(`)operator(()string<delimiter(')content(what a word)delimiter(')>operator(\)) +integer(79)operator(::)ident(`)operator(()string<delimiter(')content(what a word)delimiter(')>operator(\)) ident(p) symbol(:`) @@ -402,8 +402,8 @@ ident(a)operator(=)integer(5) ident(p) ident(p) integer(+5) ident(p) ident(a) integer(+5) -reserved(def) pre_constant(nil)operator(.)operator(+)operator(()ident(x)operator(\)) operator(~)ident(x) reserved(end) -reserved(def) pre_constant(nil)operator(.)operator([])operator(()operator(*)ident(x)operator(\)) operator([)ident(x)operator(]) reserved(end) +reserved(def) pre_constant(nil)operator(.)ident(+)operator(()ident(x)operator(\)) operator(~)ident(x) reserved(end) +reserved(def) pre_constant(nil)operator(.)ident([])operator(()operator(*)ident(x)operator(\)) operator([)ident(x)operator(]) reserved(end) ident(p)operator(() ident(p) operator(+) integer(5) operator(\)) ident(p)operator(() ident(p) integer(+5) operator(\)) ident(p)operator(() ident(p)integer(+5) operator(\)) @@ -417,28 +417,28 @@ reserved(class) class(Foou) reserved(def) method([]) ident(x)operator(=)integer(-100)operator(,)operator(&)ident(y)operator(;) ident(p) ident(x)operator(;) integer(100) reserved(end) reserved(end) ident(a0)operator(=)integer(8) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([])operator(!)pre_constant(false) comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([]) operator(!)pre_constant(false) comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([])operator(~)integer(9) comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([]) operator(~)integer(9) comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([])integer(-9) comment(#op) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([])integer(+9) comment(#op) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([]) integer(-9) comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([]) integer(+9) comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([])operator(<<)integer(9) comment(#op) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([]) string<delimiter(<<9)> comment(#value)string<content( +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([])operator(!)pre_constant(false) comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([]) operator(!)pre_constant(false) comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([])operator(~)integer(9) comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([]) operator(~)integer(9) comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([])integer(-9) comment(#op) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([])integer(+9) comment(#op) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([]) integer(-9) comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([]) integer(+9) comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([])operator(<<)integer(9) comment(#op) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([]) string<delimiter(<<9)> comment(#value)string<content( foobar)delimiter( 9)> -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([])operator(%)integer(9) comment(#op) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([])operator(/)integer(9) comment(#op) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([]) string<delimiter(%()content(9)delimiter(\))> comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([]) operator(/)integer(9)operator(/) comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([])global_variable($9) comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([])ident(a0) comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([]) global_variable($9) comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([]) ident(a0) comment(#value) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([])operator({)integer(9)operator(}) comment(#lambda (op\)) -ident(p) constant(Foou)operator(.)ident(new)operator(.)operator([]) operator({)integer(9)operator(}) comment(#lambda (op\)) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([])operator(%)integer(9) comment(#op) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([])operator(/)integer(9) comment(#op) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([]) string<delimiter(%()content(9)delimiter(\))> comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([]) regexp<delimiter(/)content(9)delimiter(/)> comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([])global_variable($9) comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([])ident(a0) comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([]) global_variable($9) comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([]) ident(a0) comment(#value) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([])operator({)integer(9)operator(}) comment(#lambda (op\)) +ident(p) constant(Foou)operator(.)ident(new)operator(.)ident([]) operator({)integer(9)operator(}) comment(#lambda (op\)) reserved(if) ident(p) reserved(then) ident(p) reserved(end) @@ -768,7 +768,7 @@ reserved(end) EOL)> reserved(def) method(add)operator(()operator(*)ident(args)operator(\)) - pre_constant(self)operator(.)operator(<<)operator(()operator(*)ident(args)operator(\)) + pre_constant(self)operator(.)ident(<<)operator(()operator(*)ident(args)operator(\)) reserved(end) @@ -841,7 +841,7 @@ reserved(def) method(yy)operator(;)reserved(yield) reserved(end) ident(block)operator(=)ident(proc)operator({)ident(p) string<delimiter(")content(blah blah)delimiter(")>operator(}) ident(yy) operator(&)ident(block) -ident(p)operator(()integer(1)operator(.)operator(+)integer(1)operator(\)) +ident(p)operator(()integer(1)operator(.)ident(+)integer(1)operator(\)) ident(p) ident(pppp) reserved(module) class(M66) diff --git a/test/scanners/ruby/example.expected.raydebug b/test/scanners/ruby/example.expected.raydebug index be68a13..e290d06 100644 --- a/test/scanners/ruby/example.expected.raydebug +++ b/test/scanners/ruby/example.expected.raydebug @@ -187,7 +187,7 @@ reserved(class) class(Set) ident(include) constant(Enumerable) comment(# Creates a new set containing the given objects.) - reserved(def) pre_constant(self)operator(.)operator([])operator(()operator(*)ident(ary)operator(\)) + reserved(def) pre_constant(self)operator(.)ident([])operator(()operator(*)ident(ary)operator(\)) ident(new)operator(()ident(ary)operator(\)) reserved(end) diff --git a/test/scanners/ruby/strange.expected.raydebug b/test/scanners/ruby/strange.expected.raydebug index b2f7d50..61a7cae 100644 --- a/test/scanners/ruby/strange.expected.raydebug +++ b/test/scanners/ruby/strange.expected.raydebug @@ -51,6 +51,13 @@ operator(%)constant(Quark) ident(dazu) operator(%) ident(abc) comment(# FIXME) +comment(# And here some special string cases) +ident(foo) operator(=) operator(%) ident(blah) comment(# comment here to ensure whitespace) +ident(foo)operator(()operator(%) ident(blah) operator(\)) +ident(foo) operator(<<) operator(%) ident(blah) comment(# stupid but has to work) +ident(foo) operator(=) operator(%) ident(blah) operator(+) operator(%) ident(blub) comment(# wicked) +ident(foo) operator(=) string<delimiter(%q )content(wicked)delimiter( )> comment(# works too) + symbol<delimiter(%s#)content(ruby allows strange)delimiter(#)>operator({)ident(constructs)operator(}) symbol<delimiter(%s#)content(ruby allows strange)delimiter(#)>global_variable($constructs) symbol<delimiter(%s#)content(ruby allows strange)delimiter(#)>class_variable(@@constructs) diff --git a/test/scanners/ruby/strange.in.rb b/test/scanners/ruby/strange.in.rb index 8369aaa..bf57322 100644 --- a/test/scanners/ruby/strange.in.rb +++ b/test/scanners/ruby/strange.in.rb @@ -51,6 +51,13 @@ puts 30.send(:/, 5) # prints 6 % abc # FIXME
+# And here some special string cases
+foo = % blah # comment here to ensure whitespace
+foo(% blah )
+foo << % blah # stupid but has to work
+foo = % blah + % blub # wicked
+foo = %q wicked # works too
+
%s#ruby allows strange#{constructs}
%s#ruby allows strange#$constructs
%s#ruby allows strange#@@constructs
|