| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
 | module CodeRay module Scanners
  
  # by Josh Goebel
  class SQL < Scanner
    register_for :sql
    
    KEYWORDS = %w(
      all and any as before begin between by case check collate
      each else end exists
      for foreign from full group having if in inner is join
      like not of on or order outer over references
      then to union using values when where
      left right distinct
    )
    
    OBJECTS = %w(
      database databases table tables column columns fields index constraint
      constraints transaction function procedure row key view trigger
    )
    
    COMMANDS = %w(
      add alter comment create delete drop grant insert into select update set
      show prompt begin commit rollback replace truncate
    )
    
    PREDEFINED_TYPES = %w(
      char varchar varchar2 enum binary text tinytext mediumtext
      longtext blob tinyblob mediumblob longblob timestamp
      date time datetime year double decimal float int
      integer tinyint mediumint bigint smallint unsigned bit
      bool boolean hex bin oct
    )
    
    PREDEFINED_FUNCTIONS = %w( sum cast substring abs pi count min max avg now )
    
    DIRECTIVES = %w( 
      auto_increment unique default charset initially deferred
      deferrable cascade immediate read write asc desc after
      primary foreign return engine
    )
    
    PREDEFINED_CONSTANTS = %w( null true false )
    
    IDENT_KIND = WordList::CaseIgnoring.new(:ident).
      add(KEYWORDS, :keyword).
      add(OBJECTS, :type).
      add(COMMANDS, :class).
      add(PREDEFINED_TYPES, :predefined_type).
      add(PREDEFINED_CONSTANTS, :predefined_constant).
      add(PREDEFINED_FUNCTIONS, :predefined).
      add(DIRECTIVES, :directive)
    
    ESCAPE = / [rbfntv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx
    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
    
    STRING_PREFIXES = /[xnb]|_\w+/i
    
    def scan_tokens encoder, options
      
      state = :initial
      string_type = nil
      string_content = ''
      name_expected = false
      
      until eos?
        
        if state == :initial
          
          if match = scan(/ \s+ | \\\n /x)
            encoder.text_token match, :space
          
          elsif match = scan(/(?:--\s?|#).*/)
            encoder.text_token match, :comment
            
          elsif match = scan(%r( /\* (!)? (?: .*? \*/ | .* ) )mx)
            encoder.text_token match, self[1] ? :directive : :comment
            
          elsif match = scan(/ [*\/=<>:;,!&^|()\[\]{}~%] | [-+\.](?!\d) /x)
            name_expected = true if match == '.' && check(/[A-Za-z_]/)
            encoder.text_token match, :operator
            
          elsif match = scan(/(#{STRING_PREFIXES})?([`"'])/o)
            prefix = self[1]
            string_type = self[2]
            encoder.begin_group :string
            encoder.text_token prefix, :modifier if prefix
            match = string_type
            state = :string
            encoder.text_token match, :delimiter
            
          elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x)
            encoder.text_token match, name_expected ? :ident : (match[0] == ?@ ? :variable : IDENT_KIND[match])
            name_expected = false
            
          elsif match = scan(/0[xX][0-9A-Fa-f]+/)
            encoder.text_token match, :hex
            
          elsif match = scan(/0[0-7]+(?![89.eEfF])/)
            encoder.text_token match, :octal
            
          elsif match = scan(/[-+]?(?>\d+)(?![.eEfF])/)
            encoder.text_token match, :integer
            
          elsif match = scan(/[-+]?(?:\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+)/)
            encoder.text_token match, :float
          
          elsif match = scan(/\\N/)
            encoder.text_token match, :predefined_constant
            
          else
            encoder.text_token getch, :error
            
          end
          
        elsif state == :string
          if match = scan(/[^\\"'`]+/)
            string_content << match
            next
          elsif match = scan(/["'`]/)
            if string_type == match
              if peek(1) == string_type  # doubling means escape
                string_content << string_type << getch
                next
              end
              unless string_content.empty?
                encoder.text_token string_content, :content
                string_content = ''
              end
              encoder.text_token match, :delimiter
              encoder.end_group :string
              state = :initial
              string_type = nil
            else
              string_content << match
            end
          elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
            unless string_content.empty?
              encoder.text_token string_content, :content
              string_content = ''
            end
            encoder.text_token match, :char
          elsif match = scan(/ \\ . /mox)
            string_content << match
            next
          elsif match = scan(/ \\ | $ /x)
            unless string_content.empty?
              encoder.text_token string_content, :content
              string_content = ''
            end
            encoder.text_token match, :error
            state = :initial
          else
            raise "else case \" reached; %p not handled." % peek(1), encoder
          end
          
        else
          raise 'else-case reached', encoder
          
        end
        
      end
      
      if state == :string
        encoder.end_group state
      end
      
      encoder
      
    end
    
  end
  
end end
 |