summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/io.rb
blob: 22f50cc4129d96d8f6f24cb7c789ca1b68579425 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
module CodeRay module Scanners

	#A simple scanner for a simple language: Io

	class Io < Scanner

		register_for :io

		RESERVED_WORDS = [ 'clone','init', 'method', 'list', 'vector', 'block',  'if','ifTrue','ifFalse','ifTrueIfFalse','then', 'for','loop',
		'reverseForeach','foreach','map','continue','break','while','do','return',
		'self','sender','target','proto','parent','protos']

		PREDEFINED_TYPES = []

		PREDEFINED_CONSTANTS = ['Object', 'Lobby',
                'TRUE','true','FALSE','false','NULL','null','Null','Nil','nil','YES','NO']

		IDENT_KIND = WordList.new(:ident).
			add(RESERVED_WORDS, :reserved).
			add(PREDEFINED_TYPES, :pre_type).
			add(PREDEFINED_CONSTANTS, :pre_constant)

		ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
		UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x

		def scan_tokens tokens, options

			state = :initial

			until eos?

				kind = :error
				match = nil

				if state == :initial

					if scan(/ \s+ | \\\n /x)
						kind = :space

					elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
						kind = :comment


					elsif scan(/ [-+*\/\$\@=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
						kind = :operator

					elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
						kind = IDENT_KIND[match]
						if kind == :ident and check(/:(?!:)/)
							match << scan(/:/)
							kind = :label
						end

					elsif match = scan(/L?"/)
						tokens << [:open, :string]
						if match[0] == ?L
							tokens << ['L', :modifier]
							match = '"'
						end
						state = :string
						kind = :delimiter

					elsif scan(/#\s*(\w*)/)
						kind = :preprocessor  # FIXME multiline preprocs
						state = :include_expected if self[1] == 'include'

					elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
						kind = :char

					elsif scan(/0[xX][0-9A-Fa-f]+/)
						kind = :hex

					elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
						kind = :oct

					elsif scan(/(?:\d+)(?![.eEfF])/)
						kind = :integer

					elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
						kind = :float

					else
						getch
					end

				elsif state == :string
					if scan(/[^\\"]+/)
						kind = :content
					elsif scan(/"/)
						tokens << ['"', :delimiter]
						tokens << [:close, :string]
						state = :initial
						next
					elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
						kind = :char
					elsif scan(/ \\ | $ /x)
						kind = :error
						state = :initial
					else
						raise "else case \" reached; %p not handled." % peek(1), tokens
					end

				elsif state == :include_expected
					if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
						kind = :include
						state = :initial

					elsif match = scan(/\s+/)
						kind = :space
						state = :initial if match.index ?\n

					else
						getch

					end

				else
					raise 'else-case reached', tokens

				end

				match ||= matched
				raise [match, kind], tokens if kind == :error

				tokens << [match, kind]

			end

			tokens
		end

	end

end end