1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
|
module CodeRay
module Scanners
# HTML Scanner
#
# Alias: +xhtml+
#
# See also: Scanners::XML
class HTML < Scanner
register_for :html
KINDS_NOT_LOC = [
:comment, :doctype, :preprocessor,
:tag, :attribute_name, :operator,
:attribute_value, :string,
:plain, :entity, :error,
] # :nodoc:
EVENT_ATTRIBUTES = %w(
onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay
oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick
ondrag ondragdrop ondragend ondragenter ondragleave ondragover
ondragstart ondrop ondurationchange onemptied onended onerror onfocus
onformchange onforminput onhashchange oninput oninvalid onkeydown
onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart
onmessage onmousedown onmousemove onmouseout onmouseover onmouseup
onmousewheel onmove onoffline ononline onpagehide onpageshow onpause
onplay onplaying onpopstate onprogress onratechange onreadystatechange
onredo onreset onresize onscroll onseeked onseeking onselect onshow
onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload
onvolumechange onwaiting
)
IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil).
add(EVENT_ATTRIBUTES, :script)
ATTR_NAME = /[\w.:-]+/ # :nodoc:
TAG_END = /\/?>/ # :nodoc:
HEX = /[0-9a-fA-F]/ # :nodoc:
ENTITY = /
&
(?:
\w+
|
\#
(?:
\d+
|
x#{HEX}+
)
)
;
/ox # :nodoc:
PLAIN_STRING_CONTENT = {
"'" => /[^&'>\n]+/,
'"' => /[^&">\n]+/,
} # :nodoc:
def reset
super
@state = :initial
@plain_string_content = nil
end
protected
def setup
@state = :initial
@plain_string_content = nil
@in_tag = nil
end
def scan_java_script encoder, code
if code && !code.empty?
@java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
# encoder.begin_group :inline
@java_script_scanner.tokenize code, :tokens => encoder
# encoder.end_group :inline
end
end
def scan_tokens encoder, options
state = options[:state] || @state
plain_string_content = @plain_string_content
in_tag = @in_tag
in_attribute = nil
encoder.begin_group :string if state == :attribute_value_string
until eos?
if state != :in_special_tag && match = scan(/\s+/m)
encoder.text_token match, :space
else
case state
when :initial
if match = scan(/<!--(?:.*?-->|.*)/m)
encoder.text_token match, :comment
elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
encoder.text_token match, :doctype
elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
encoder.text_token match, :preprocessor
elsif match = scan(/<\?(?:.*?\?>|.*)/m)
encoder.text_token match, :comment
elsif match = scan(/<\/[-\w.:]*>?/m)
in_tag = nil
encoder.text_token match, :tag
elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
encoder.text_token match, :tag
in_tag = self[1]
if self[2]
state = :in_special_tag if in_tag
else
state = :attribute
end
elsif match = scan(/[^<>&]+/)
encoder.text_token match, :plain
elsif match = scan(/#{ENTITY}/ox)
encoder.text_token match, :entity
elsif match = scan(/[<>&]/)
in_tag = nil
encoder.text_token match, :error
else
raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
end
when :attribute
if match = scan(/#{TAG_END}/o)
encoder.text_token match, :tag
in_attribute = nil
if in_tag
state = :in_special_tag
else
state = :initial
end
elsif match = scan(/#{ATTR_NAME}/o)
in_attribute = IN_ATTRIBUTE[match]
encoder.text_token match, :attribute_name
state = :attribute_equal
else
in_tag = nil
encoder.text_token getch, :error
end
when :attribute_equal
if match = scan(/=/) #/
encoder.text_token match, :operator
state = :attribute_value
else
state = :attribute
next
end
when :attribute_value
if match = scan(/#{ATTR_NAME}/o)
encoder.text_token match, :attribute_value
state = :attribute
elsif match = scan(/["']/)
if in_attribute == :script
encoder.begin_group :inline
encoder.text_token match, :inline_delimiter
if scan(/javascript:[ \t]*/)
encoder.text_token matched, :comment
end
code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
scan_java_script encoder, code
match = scan(/["']/)
encoder.text_token match, :inline_delimiter if match
encoder.end_group :inline
state = :attribute
in_attribute = nil
else
encoder.begin_group :string
state = :attribute_value_string
plain_string_content = PLAIN_STRING_CONTENT[match]
encoder.text_token match, :delimiter
end
elsif match = scan(/#{TAG_END}/o)
encoder.text_token match, :tag
state = :initial
else
encoder.text_token getch, :error
end
when :attribute_value_string
if match = scan(plain_string_content)
encoder.text_token match, :content
elsif match = scan(/['"]/)
encoder.text_token match, :delimiter
encoder.end_group :string
state = :attribute
elsif match = scan(/#{ENTITY}/ox)
encoder.text_token match, :entity
elsif match = scan(/&/)
encoder.text_token match, :content
elsif match = scan(/[\n>]/)
encoder.end_group :string
state = :initial
encoder.text_token match, :error
end
when :in_special_tag
case in_tag
when 'script'
encoder.text_token match, :space if match = scan(/[ \t]*\n/)
if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
code = self[2] || self[4]
closing = self[3]
encoder.text_token self[1], :comment
else
code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
closing = false
end
unless code.empty?
encoder.begin_group :inline
scan_java_script encoder, code
encoder.end_group :inline
end
encoder.text_token closing, :comment if closing
state = :initial
else
raise 'unknown special tag: %p' % [in_tag]
end
else
raise_inspect 'Unknown state: %p' % [state], encoder
end
end
end
if options[:keep_state]
@state = state
@plain_string_content = plain_string_content
@in_tag = in_tag
end
encoder.end_group :string if state == :attribute_value_string
encoder
end
end
end
end
|