1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
module CodeRay
# The Tokens class represents a list of tokens returned from
# a Scanner. It's actually just an Array with a few helper methods.
#
# A token itself is not a special object, just two elements in an Array:
# * the _token_ _text_ (the original source of the token in a String) or
# a _token_ _action_ (begin_group, end_group, begin_line, end_line)
# * the _token_ _kind_ (a Symbol representing the type of the token)
#
# It looks like this:
#
# ..., '# It looks like this', :comment, ...
# ..., '3.1415926', :float, ...
# ..., '$^', :error, ...
#
# Some scanners also yield sub-tokens, represented by special
# token actions, for example :begin_group and :end_group.
#
# The Ruby scanner, for example, splits "a string" into:
#
# [
# :begin_group, :string,
# '"', :delimiter,
# 'a string', :content,
# '"', :delimiter,
# :end_group, :string
# ]
#
# Tokens can be used to save the output of a Scanners in a simple
# Ruby object that can be send to an Encoder later:
#
# tokens = CodeRay.scan('price = 2.59', :ruby).tokens
# tokens.encode(:html)
# tokens.html
# CodeRay.encoder(:html).encode_tokens(tokens)
#
# Tokens gives you the power to handle pre-scanned code very easily:
# You can serialize it to a JSON string and store it in a database, pass it
# around to encode it more than once, send it to other algorithms...
class Tokens < Array
# The Scanner instance that created the tokens.
attr_accessor :scanner
# Encode the tokens using encoder.
#
# encoder can be
# * a plugin name like :html oder 'statistic'
# * an Encoder object
#
# options are passed to the encoder.
def encode encoder, options = {}
encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
encoder.encode_tokens self, options
end
# Turn tokens into a string by concatenating them.
def to_s
encode CodeRay::Encoders::Encoder.new
end
# Redirects unknown methods to encoder calls.
#
# For example, if you call +tokens.html+, the HTML encoder
# is used to highlight the tokens.
def method_missing meth, options = {}
encode meth, options
rescue PluginHost::PluginNotFound
raise
end
# Split the tokens into parts of the given +sizes+.
#
# The result will be an Array of Tokens objects. The parts have
# the text size specified by the parameter. In addition, each
# part closes all opened tokens. This is useful to insert tokens
# betweem them.
#
# This method is used by @Scanner#tokenize@ when called with an Array
# of source strings. The Diff encoder uses it for inline highlighting.
def split_into_parts *sizes
return Array.new(sizes.size) { Tokens.new } if size == 2 && first == ''
parts = []
opened = []
content = nil
part = Tokens.new
part_size = 0
size = sizes.first
i = 0
for item in self
case content
when nil
content = item
when String
if size && part_size + content.size > size # token must be cut
if part_size < size # some part of the token goes into this part
content = content.dup # content may no be safe to change
part << content.slice!(0, size - part_size) << item
end
# close all open groups and lines...
closing = opened.reverse.flatten.map do |content_or_kind|
case content_or_kind
when :begin_group
:end_group
when :begin_line
:end_line
else
content_or_kind
end
end
part.concat closing
begin
parts << part
part = Tokens.new
size = sizes[i += 1]
end until size.nil? || size > 0
# ...and open them again.
part.concat opened.flatten
part_size = 0
redo unless content.empty?
else
part << content << item
part_size += content.size
end
content = nil
when Symbol
case content
when :begin_group, :begin_line
opened << [content, item]
when :end_group, :end_line
opened.pop
else
raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
end
part << content << item
content = nil
else
raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
end
end
parts << part
parts << Tokens.new while parts.size < sizes.size
parts
end
# Return the actual number of tokens.
def count
size / 2
end
alias text_token push
def begin_group kind; push :begin_group, kind end
def end_group kind; push :end_group, kind end
def begin_line kind; push :begin_line, kind end
def end_line kind; push :end_line, kind end
alias tokens concat
end
end
|