summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/groovy.rb
blob: fd7fbd953d1fdd0691ae9a18e28e314d2b1c048d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
module CodeRay
module Scanners

  load :java
  
  # Scanner for Groovy.
  class Groovy < Java

    include Streamable
    register_for :groovy
    
    # TODO: check list of keywords
    GROOVY_KEYWORDS = %w[
      as assert def in
    ]  # :nodoc:
    KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
      case instanceof new return throw typeof while as assert in
    ]  # :nodoc:
    GROOVY_MAGIC_VARIABLES = %w[ it ]  # :nodoc:
    
    IDENT_KIND = Java::IDENT_KIND.dup.
      add(GROOVY_KEYWORDS, :keyword).
      add(GROOVY_MAGIC_VARIABLES, :local_variable)  # :nodoc:
    
    ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x  # :nodoc:
    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} /x  # :nodoc: no 4-byte unicode chars? U[a-fA-F0-9]{8}
    REGEXP_ESCAPE =  / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x  # :nodoc:
    
    # TODO: interpretation inside ', ", /
    STRING_CONTENT_PATTERN = {
      "'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
      '"' => /[^\\$"\n]+/,
      "'''" => /(?>[^\\']+|'(?!''))+/,
      '"""' => /(?>[^\\$"]+|"(?!""))+/,
      '/' => /[^\\$\/\n]+/,
    }  # :nodoc:
    
  protected
    
    def scan_tokens tokens, options
      
      state = :initial
      inline_block_stack = []
      inline_block_paren_depth = nil
      string_delimiter = nil
      import_clause = class_name_follows = last_token = after_def = false
      value_expected = true

      until eos?

        kind = nil
        match = nil
        
        case state

        when :initial

          if match = scan(/ \s+ | \\\n /x)
            tokens << [match, :space]
            if match.index ?\n
              import_clause = after_def = false
              value_expected = true unless value_expected
            end
            next
          
          elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
            value_expected = true
            after_def = false
            kind = :comment
          
          elsif bol? && scan(/ \#!.* /x)
            kind = :doctype
          
          elsif import_clause && scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
            after_def = value_expected = false
            kind = :include
          
          elsif match = scan(/ #{IDENT} | \[\] /ox)
            kind = IDENT_KIND[match]
            value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
            if last_token == '.'
              kind = :ident
            elsif class_name_follows
              kind = :class
              class_name_follows = false
            elsif after_def && check(/\s*[({]/)
              kind = :method
              after_def = false
            elsif kind == :ident && last_token != '?' && check(/:/)
              kind = :key
            else
              class_name_follows = true if match == 'class' || (import_clause && match == 'as')
              import_clause = match == 'import'
              after_def = true if match == 'def'
            end
          
          elsif scan(/;/)
            import_clause = after_def = false
            value_expected = true
            kind = :operator
          
          elsif scan(/\{/)
            class_name_follows = after_def = false
            value_expected = true
            kind = :operator
            if !inline_block_stack.empty?
              inline_block_paren_depth += 1
            end
          
          # TODO: ~'...', ~"..." and ~/.../ style regexps
          elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
              && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
            value_expected = true
            value_expected = :regexp if match == '~'
            after_def = false
            kind = :operator
          
          elsif match = scan(/ [)\]}] /x)
            value_expected = after_def = false
            if !inline_block_stack.empty? && match == '}'
              inline_block_paren_depth -= 1
              if inline_block_paren_depth == 0  # closing brace of inline block reached
                tokens << [match, :inline_delimiter]
                tokens << [:close, :inline]
                state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
                next
              end
            end
            kind = :operator
          
          elsif check(/[\d.]/)
            after_def = value_expected = false
            if scan(/0[xX][0-9A-Fa-f]+/)
              kind = :hex
            elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
              kind = :oct
            elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
              kind = :float
            elsif scan(/\d+[lLgG]?/)
              kind = :integer
            end

          elsif match = scan(/'''|"""/)
            after_def = value_expected = false
            state = :multiline_string
            tokens << [:open, :string]
            string_delimiter = match
            kind = :delimiter
          
          # TODO: record.'name' syntax
          elsif match = scan(/["']/)
            after_def = value_expected = false
            state = match == '/' ? :regexp : :string
            tokens << [:open, state]
            string_delimiter = match
            kind = :delimiter

          elsif value_expected && (match = scan(/\//))
            after_def = value_expected = false
            tokens << [:open, :regexp]
            state = :regexp
            string_delimiter = '/'
            kind = :delimiter

          elsif scan(/ @ #{IDENT} /ox)
            after_def = value_expected = false
            kind = :annotation

          elsif scan(/\//)
            after_def = false
            value_expected = true
            kind = :operator
          
          else
            getch
            kind = :error

          end

        when :string, :regexp, :multiline_string
          if scan(STRING_CONTENT_PATTERN[string_delimiter])
            kind = :content
            
          elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
            tokens << [match, :delimiter]
            if state == :regexp
              # TODO: regexp modifiers? s, m, x, i?
              modifiers = scan(/[ix]+/)
              tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
            end
            state = :string if state == :multiline_string
            tokens << [:close, state]
            string_delimiter = nil
            after_def = value_expected = false
            state = :initial
            next
          
          elsif (state == :string || state == :multiline_string) &&
              (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
            if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
              kind = :content
            else
              kind = :char
            end
          elsif state == :regexp && scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
            kind = :char
          
          elsif match = scan(/ \$ #{IDENT} /mox)
            tokens << [:open, :inline]
            tokens << ['$', :inline_delimiter]
            match = match[1..-1]
            tokens << [match, IDENT_KIND[match]]
            tokens << [:close, :inline]
            next
          elsif match = scan(/ \$ \{ /x)
            tokens << [:open, :inline]
            tokens << ['${', :inline_delimiter]
            inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
            inline_block_paren_depth = 1
            state = :initial
            next
          
          elsif scan(/ \$ /mx)
            kind = :content
          
          elsif scan(/ \\. /mx)
            kind = :content
          
          elsif scan(/ \\ | \n /x)
            tokens << [:close, state]
            kind = :error
            after_def = value_expected = false
            state = :initial
          
          else
            raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
          end

        else
          raise_inspect 'Unknown state', tokens

        end

        match ||= matched
        if $CODERAY_DEBUG and not kind
          raise_inspect 'Error token %p in line %d' %
            [[match, kind], line], tokens
        end
        raise_inspect 'Empty token', tokens unless match
        
        last_token = match unless [:space, :comment, :doctype].include? kind
        
        tokens << [match, kind]

      end

      if [:multiline_string, :string, :regexp].include? state
        tokens << [:close, state]
      end

      tokens
    end

  end

end
end