summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/groovy.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/coderay/scanners/groovy.rb')
-rw-r--r--lib/coderay/scanners/groovy.rb247
1 files changed, 247 insertions, 0 deletions
diff --git a/lib/coderay/scanners/groovy.rb b/lib/coderay/scanners/groovy.rb
new file mode 100644
index 0000000..5e76357
--- /dev/null
+++ b/lib/coderay/scanners/groovy.rb
@@ -0,0 +1,247 @@
+module CodeRay
+module Scanners
+
+ load :java
+
+ class Groovy < Java
+
+ include Streamable
+ register_for :groovy
+
+ # TODO: Check this!
+ KEYWORDS = Java::KEYWORDS + %w[
+ def assert as in
+ ]
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
+ case instanceof new return throw typeof while as assert in
+ ]
+
+ MAGIC_VARIABLES = Java::MAGIC_VARIABLES + %w[ it ]
+ # DIRECTIVES = %w[
+ # abstract extends final implements native private protected public
+ # static strictfp synchronized threadsafe throws transient volatile
+ # ]
+
+ IDENT_KIND = WordList.new(:ident).
+ add(KEYWORDS, :keyword).
+ add(MAGIC_VARIABLES, :local_variable).
+ add(TYPES, :type).
+ add(BuiltinTypes::List, :pre_type).
+ add(DIRECTIVES, :directive)
+
+ ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # no 4-byte unicode chars? U[a-fA-F0-9]{8}
+ REGEXP_ESCAPE = / [bBdDsSwW] /x
+ STRING_CONTENT_PATTERN = {
+ "'" => /[^\\$'\n]+/,
+ '"' => /[^\\$"\n]+/,
+ "'''" => /(?>[^\\$']+|'(?!''))+/,
+ '"""' => /(?>[^\\$"]+|"(?!""))+/,
+ '/' => /[^\\$\/\n]+/,
+ }
+
+ def scan_tokens tokens, options
+
+ state = :initial
+ string_delimiter = nil
+ import_clause = class_name_follows = last_token_dot = after_def = false
+ value_expected = true
+
+ until eos?
+
+ kind = nil
+ match = nil
+
+ case state
+
+ when :initial
+
+ if match = scan(/ \s+ | \\\n /x)
+ tokens << [match, :space]
+ if match.index ?\n
+ import_clause = after_def = false
+ value_expected = true
+ end
+ next
+
+ elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+ value_expected = true
+ after_def = false
+ kind = :comment
+
+ elsif bol? && scan(/ \#!.* /x)
+ kind = :doctype
+
+ elsif import_clause && scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
+ after_def = value_expected = false
+ kind = :include
+
+ elsif match = scan(/ #{IDENT} | \[\] /ox)
+ kind = IDENT_KIND[match]
+ value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
+ if last_token_dot
+ kind = :ident
+ elsif class_name_follows
+ kind = :class
+ class_name_follows = false
+ elsif after_def && check(/\s*[({]/)
+ kind = :method
+ after_def = false
+ elsif kind == :ident && check(/:/)
+ kind = :key
+ else
+ class_name_follows = true if match == 'class' || (import_clause && match == 'as')
+ import_clause = match == 'import'
+ after_def = true if match == 'def'
+ end
+
+ # TODO: ~'...', ~"..." and ~/.../ style regexps
+ elsif scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
+ && | \|\| | \*\*=? | ==?~ | [-+*%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
+ value_expected = true
+ after_def = false
+ kind = :operator
+
+ elsif scan(/ [)\]}]+ /x)
+ value_expected = after_def = false
+
+ elsif scan(/;/)
+ import_clause = after_def = false
+ value_expected = true
+ kind = :operator
+
+ elsif scan(/\{/)
+ class_name_follows = after_def = false
+ value_expected = true
+ kind = :operator
+
+ elsif check(/[\d.]/)
+ after_def = value_expected = false
+ if scan(/0[xX][0-9A-Fa-f]+/)
+ kind = :hex
+ elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
+ kind = :oct
+ elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
+ kind = :float
+ elsif scan(/\d+[lLgG]?/)
+ kind = :integer
+ end
+
+ elsif match = scan(/'''|"""/)
+ after_def = value_expected = false
+ state = :multiline_string
+ tokens << [:open, :string]
+ string_delimiter = match
+ kind = :delimiter
+
+ elsif match = scan(/["']/)
+ after_def = value_expected = false
+ state = match == '/' ? :regexp : :string
+ tokens << [:open, state]
+ string_delimiter = match
+ kind = :delimiter
+
+ elsif value_expected && (match = scan(/\/(?=\S)/))
+ after_def = value_expected = false
+ tokens << [:open, :regexp]
+ state = :regexp
+ string_delimiter = '/'
+ kind = :delimiter
+
+ elsif scan(/ @ #{IDENT} /ox)
+ after_def = value_expected = false
+ kind = :annotation
+
+ elsif scan(/\//)
+ after_def = false
+ value_expected = true
+ kind = :operator
+
+ else
+ getch
+ kind = :error
+
+ end
+
+ when :string, :regexp, :multiline_string
+ if scan(STRING_CONTENT_PATTERN[string_delimiter])
+ kind = :content
+ elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
+ tokens << [match, :delimiter]
+ if state == :regexp
+ modifiers = scan(/[ix]+/)
+ tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
+ end
+ state = :string if state == :multiline_string
+ tokens << [:close, state]
+ string_delimiter = nil
+ after_def = value_expected = false
+ state = :initial
+ next
+
+ elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
+ if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
+ kind = :content
+ else
+ kind = :char
+ end
+ elsif state == :regexp && scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ kind = :char
+
+ elsif match = scan(/ \$ #{IDENT} /mox)
+ tokens << [:open, :inline]
+ tokens << ['$', :inline_delimiter]
+ match = match[1..-1]
+ tokens << [match, IDENT_KIND[match]]
+ tokens << [:close, :inline]
+ next
+ elsif match = scan(/ \$ \{ [^}]* \} /mox)
+ # TODO: recursive inline strings
+ tokens << [:open, :inline]
+ tokens << ['${', :inline_delimiter]
+ tokens << [match[2..-2], :ident]
+ tokens << ['}', :inline_delimiter]
+ tokens << [:close, :inline]
+ next
+
+ elsif scan(/ \\. | \$ /mx)
+ kind = :content
+
+ elsif scan(/ \\ | $ /x)
+ tokens << [:close, :delimiter]
+ kind = :error
+ after_def = value_expected = false
+ state = :initial
+ else
+ raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
+ end
+
+ else
+ raise_inspect 'Unknown state', tokens
+
+ end
+
+ match ||= matched
+ if $DEBUG and not kind
+ raise_inspect 'Error token %p in line %d' %
+ [[match, kind], line], tokens
+ end
+ raise_inspect 'Empty token', tokens unless match
+
+ last_token_dot = match == '.'
+
+ tokens << [match, kind]
+
+ end
+
+ if [:string, :regexp].include? state
+ tokens << [:close, state]
+ end
+
+ tokens
+ end
+
+ end
+
+end
+end