1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
# TODO: string_delimiter should be part of the state: push(:regexp, '/'), check_if -> (state, delimiter) { … }
module CodeRay
module Scanners
# Scanner for JavaScript.
#
# Aliases: +ecmascript+, +ecma_script+, +javascript+
class JavaScript5 < RuleBasedScanner
register_for :java_script5
file_extension 'js'
# The actual JavaScript keywords.
KEYWORDS = %w[
break case catch continue default delete do else
finally for function if in instanceof new
return switch throw try typeof var void while with
] # :nodoc:
PREDEFINED_CONSTANTS = %w[
false null true undefined NaN Infinity
] # :nodoc:
MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
case delete in instanceof new return throw typeof with
] # :nodoc:
# Reserved for future use.
RESERVED_WORDS = %w[
abstract boolean byte char class debugger double enum export extends
final float goto implements import int interface long native package
private protected public short static super synchronized throws transient
volatile
] # :nodoc:
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :predefined_constant).
add(MAGIC_VARIABLES, :local_variable).
add(KEYWORDS, :keyword) # :nodoc:
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
STRING_CONTENT_PATTERN = {
"'" => /[^\\']+/,
'"' => /[^\\"]+/,
'/' => /[^\\\/]+/,
} # :nodoc:
KEY_CHECK_PATTERN = {
"'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
'"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
} # :nodoc:
state :initial do
on %r/ \s+ | \\\n /x, :space, set(:value_expected) { |match, value_expected| value_expected || match.index(?\n) }
on %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx, :comment, flag_off(:value_expected)
# state = :open_multi_line_comment if self[1]
on? %r/\.?\d/ do
on %r/0[xX][0-9A-Fa-f]+/, :hex, flag_off(:key_expected, :value_expected)
on %r/(?>0[0-7]+)(?![89.eEfF])/, :octal, flag_off(:key_expected, :value_expected)
on %r/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, flag_off(:key_expected, :value_expected)
on %r/\d+/, :integer, flag_off(:key_expected, :value_expected)
end
on check_if(:value_expected), %r/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim, -> (match, encoder) do
# TODO: scan over nested tags
xml_scanner.tokenize match, :tokens => encoder
end, flag_off(:value_expected)
on %r/ [-+*=<>?:;,!&^|(\[{~%]++ (?<![{,]) | \.+(?!\d) /x, :operator, flag_on(:value_expected), flag_off(:key_expected, :function_expected)
on %r/ [-+*=<>?:;,!&^|(\[{~%]*+ (?<=[{,]) /x, :operator, flag_on(:value_expected, :key_expected), flag_off(:function_expected)
on %r/ [)\]}]+ /x, :operator, flag_off(:function_expected, :key_expected, :value_expected)
on %r/ function (?![A-Za-z_0-9$]) /x, :keyword, flag_on(:function_expected), flag_off(:key_expected, :value_expected)
on %r/ [$a-zA-Z_][A-Za-z_0-9$]* /x, kind { |match, function_expected, key_expected|
kind = IDENT_KIND[match]
# TODO: labels
if kind == :ident
if match.index(?$) # $ allowed inside an identifier
kind = :predefined
elsif function_expected
kind = :function
elsif check(/\s*[=:]\s*function\b/)
kind = :function
elsif key_expected && check(/\s*:/)
kind = :key
end
end
kind
}, flag_off(:function_expected, :key_expected), set(:value_expected) { |match| KEYWORDS_EXPECTING_VALUE[match] }
on %r/["']/, push { |match, key_expected| key_expected && check(KEY_CHECK_PATTERN[match]) ? :key : :string }, :delimiter, set(:string_delimiter) { |match| match }
on check_if(:value_expected), %r/\//, push(:regexp), :delimiter
on %r/\//, :operator, flag_on(:value_expected), flag_off(:key_expected)
end
state :string, :key do
on pattern { |string_delimiter| STRING_CONTENT_PATTERN[string_delimiter] }, :content
on %r/["']/, :delimiter, unset(:string_delimiter), flag_off(:key_expected, :value_expected), pop
on %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, kind { |match, string_delimiter|
string_delimiter == "'" && !(match == "\\\\" || match == "\\'") ? :content : :char
}
on %r/ \\. /mx, :content
on %r/ \\ /x, unset(:string_delimiter), flag_off(:key_expected, :value_expected), pop, :error
end
state :regexp do
on STRING_CONTENT_PATTERN['/'], :content
on %r/(\/)([gim]+)?/, groups(:delimiter, :modifier), flag_off(:key_expected, :value_expected), pop
on %r/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char
on %r/\\./m, :content
on %r/ \\ /x, pop, :error, flag_off(:key_expected, :value_expected)
end
# state :open_multi_line_comment do
# on %r! .*? \*/ !mx, :initial # don't consume!
# on %r/ .+ /mx, :comment, -> { value_expected = true }
#
# # if match = scan(%r! .*? \*/ !mx)
# # state = :initial
# # else
# # match = scan(%r! .+ !mx)
# # end
# # value_expected = true
# # encoder.text_token match, :comment if match
# end
protected
def setup
super
@string_delimiter = nil
@value_expected = true
@key_expected = false
@function_expected = false
end
def close_groups encoder, states
if [:string, :key, :regexp].include? states.last
encoder.end_group states.last
end
end
def reset_instance
super
@xml_scanner.reset if defined? @xml_scanner
end
def xml_scanner
@xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
end
end
end
end
|