summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/bash.rb
blob: b79047e28793f27a36f99f74201741e29c58f0d4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# Scanner for Bash
# Author: Petr Kovar <pejuko@gmail.com>

module CodeRay module Scanners

  class Bash < Scanner

    register_for :bash
    file_extension 'sh'
    title 'bash script'

    RESERVED_WORDS = %w(
      ! [[ ]] case do done elif else esac fi for function if in select then time until while { }
    )

    COMMANDS = %w(
      : . break cd continue eval exec exit export getopts hash pwd
      readonly return shift test [ ] times trap umask unset
    )

    BASH_COMMANDS = %w(
      alias bind builtin caller command declare echo enable help let
      local logout printf read set shopt source type typeset ulimit unalias
    )

    PROGRAMS = %w(
      awk bash bunzip2 bzcat bzip2 cat chgrp chmod chown cp cut date dd df dir dmesg du ed egrep
      false fgrep findmnt fusermount gawk grep groups gunzip gzip hostname install keyctl kill less
      ln loadkeys login ls lsblk lsinitcpio lsmod mbchk mkdir mkfifo mknod more mount mountpoint mv
      netstat pidof ping ping6 ps pwd readlink red rm rmdir sed sh shred sleep stty su sudo sync tar
      touch  tput tr traceroute traceroute6 true umount uname uncompress vdir zcat
    )

    VARIABLES = %w(
      CDPATH HOME IFS MAIL MAILPATH OPTARG OPTIND PATH PS1 PS2
    )

    BASH_VARIABLES = %w(
      BASH BASH_ARGC BASH_ARGV BASH_COMMAND BASH_ENV BASH_EXECUTION_STRING
      BASH_LINENO BASH_REMATCH BASH_SOURCE BASH_SUBSHELL BASH_VERSINFO
      BASH_VERSINFO[0] BASH_VERSINFO[1] BASH_VERSINFO[2] BASH_VERSINFO[3] 
      BASH_VERSINFO[4] BASH_VERSINFO[5] BASH_VERSION COLUMNS COMP_CWORD
      COMP_LINE COMP_POINT COMP_WORDBREAKS COMP_WORDS COMPREPLAY DIRSTACK
      EMACS EUID FCEDIT FIGNORE FUNCNAME GLOBIGNORE GROUPS histchars HISTCMD
      HISTCONTROL HISTFILE HISTFILESIZE HISTIGNORE HISTSIZE HISTTIMEFORMAT
      HOSTFILE HOSTNAME HOSTTYPE IGNOREEOF INPUTRC LANG LC_ALL LC_COLLATE
      LC_CTYPE LC_MESSAGE LC_NUMERIC LINENNO LINES MACHTYPE MAILCHECK OLDPWD
      OPTERR OSTYPE PIPESTATUS POSIXLY_CORRECT PPID PROMPT_COMMAND PS3 PS4 PWD
      RANDOM REPLAY SECONDS SHELL SHELLOPTS SHLVL TIMEFORMAT TMOUT TMPDIR UID
    )

    PRE_CONSTANTS = / \$\{? (?: \# | \? | \d | \* | @ | - | \$ | \! | _ ) \}? /ox

    IDENT_KIND = WordList.new(:ident).
      add(RESERVED_WORDS, :reserved).
      add(COMMANDS, :method).
      add(BASH_COMMANDS, :method).
#      add(PROGRAMS, :method).
      add(VARIABLES, :predefined).
      add(BASH_VARIABLES, :predefined)

    attr_reader :state, :quote

    def initialize(*args)
      super(*args)
      @state = :initial
      @quote = nil
      @shell = false
      @brace_shell = 0
      @quote_brace_shell = 0
    end

    def scan_tokens encoder, options

      until eos?
        kind = match = nil

        if match = scan(/\n/)
          encoder.text_token(match, :space)
          next
        end

        if @state == :initial
          if  match = scan(/\A#!.*/)
            kind = :directive
          elsif match = scan(/\s*#.*/)
            kind = :comment
          elsif match = scan(/[^"]#/)
            kind = :ident
          elsif match = scan(/\.\.+/)
            kind = :plain
          elsif match = scan(/(?:\.|source)\s+/)
            kind = :reserved
          elsif match = scan(/(?:\\.|,)/)
            kind = :plain
          elsif match = scan(/;/)
            kind = :delimiter
          elsif match = scan(/"/)
            @state = :quote
            @quote = match
            encoder.begin_group :string
            encoder.text_token(match, :delimiter)
            next
          elsif match = scan(/<<\S+/)
            @state = :quote
            match =~ /<<(\S+)/
            @quote = "#{$1}"
            encoder.begin_group :string
            encoder.text_token(match, :delimiter)
            next
          elsif match = scan(/`/)
            if @shell
              encoder.text_token(match, :delimiter)
              encoder.end_group :shell
            else
              encoder.begin_group :shell
              encoder.text_token(match, :delimiter)
            end
            @shell = (not @shell)
            next
          elsif match = scan(/'[^']*'?/)
            kind = :string
          elsif match = scan(/(?: \& | > | < | \| >> | << | >\& )/ox)
            kind = :binary
          elsif match = scan(/\d+[\.-](?:\d+[\.-]?)+/)
            #versions, dates, and hyphen delimited numbers
            kind = :float
          elsif match = scan(/\d+\.\d+\s+/)
            kind = :float
          elsif match = scan(/\d+/)
            kind = :integer
          elsif match = scan(/ (?: \$\(\( | \)\) ) /x)
            kind = :global_variable
          elsif match = scan(/ \$\{ [^\}]+ \} /ox)
            match =~ /\$\{(.*)\}/
            var=$1
            if var =~ /\[.*\]/
              encoder.text_token("${", :instance_variable)
              match_array(var, encoder)
              encoder.text_token("}", :instance_variable)
              next
            end
            kind = IDENT_KIND[var]
            kind = :instance_variable if kind == :ident
          #elsif match = scan(/ \$\( [^\)]+ \) /ox)
          elsif match = scan(/ \$\( /ox)
            @brace_shell += 1
            encoder.begin_group :shell
            encoder.text_token(match, :delimiter)
            next
          elsif @brace_shell > 0 && match = scan(/ \) /ox)
            encoder.text_token(match, :delimiter)
            encoder.end_group :shell
            @brace_shell -= 1
            next
          elsif match = scan(PRE_CONSTANTS)
            kind = :predefined_constant
          elsif match = scan(/[^\s'"]*[A-Za-z_][A-Za-z_0-9]*\+?=/)
            match =~ /(.*?)([A-Za-z_][A-Za-z_0-9]*)(\+?=)/
            str = $1
            pre = $2
            op = $3
            kind = :plain
            if str.to_s.strip.empty?
              kind = IDENT_KIND[pre]
              kind = :instance_variable if kind == :ident
              encoder.text_token(pre, kind)
              encoder.text_token(op, :operator)
              next
            end
          elsif match = scan(/[A-Za-z_]+\[[A-Za-z_\@\*\d]+\]/)
            # array
            match_array(match, encoder)
            next
          elsif match = scan(/ \$[A-Za-z_][A-Za-z_0-9]* /ox)
            match =~ /\$(.*)/
            kind = IDENT_KIND[$1]
            kind = :instance_variable if kind == :ident
          elsif match = scan(/read \S+/)
            match =~ /read(\s+)(\S+)/
            encoder.text_token('read', :method)
            encoder.text_token($1, :space)
            encoder.text_token($2, :instance_variable)
            next
          elsif match = scan(/[\!\:\[\]\{\}]/)
            kind = :reserved
          elsif match = scan(/ [A-Za-z_][A-Za-z_\d]*;? /x)
            match =~ /([^;]+);?/
            kind = IDENT_KIND[$1]
            if match[/([^;]+);$/]
              encoder.text_token($1, kind)
              encoder.text_token(';', :delimiter)
              next
            end
          elsif match = scan(/(?: = | - | \+ | \{ | \} | \( | \) | && | \|\| | ;; | ! )/ox)
            kind = :operator
          elsif match = scan(/\s+/)
            kind = :space
          elsif match = scan(/[^ \$"'`\d]/)
            kind = :plain
          elsif match = scan(/.+/)
            # this shouldn't be :reserved for highlighting bad matches
            match, kind = handle_error(match, options)
          end
        elsif @state == :quote
          if (match = scan(/\\.?/))
            kind = :content
          elsif match = scan(/#{@quote}/)
            encoder.text_token(match, :delimiter)
            encoder.end_group :string
            @quote = nil
            @state = :initial
            next
            #kind = :symbol
          elsif match = scan(PRE_CONSTANTS)
            kind = :predefined_constant
          elsif match = scan(/ (?: \$\(\(.*?\)\) ) /x)
            kind = :global_variable
          elsif match = scan(/ \$\( /ox)
            encoder.begin_group :shell
            encoder.text_token(match, :delimiter)
            @quote_brace_shell += 1
            next
          elsif match = scan(/\)/)
            if @quote_brace_shell > 0
              encoder.text_token(match, :delimiter)
              encoder.end_group :shell
              @quote_brace_shell -= 1
              next
            else
              kind = :content
            end
          elsif match = scan(/ \$ (?: (?: \{ [^\}]* \}) | (?: [A-Za-z_0-9]+ ) ) /x)
            match =~ /(\$\{?)([^\}]*)(\}?)/
            pre=$1
            var=$2
            post=$3
            if var =~ /\[.*?\]/
              encoder.text_token(pre,:instance_variable)
              match_array(var, encoder)
              encoder.text_token(post,:instance_variable)
              next
            end
            kind = IDENT_KIND[match]
            kind = :instance_variable if kind == :ident
          elsif match = scan(/[^\)\$#{@quote}\\]+/)
            kind = :content
          else match = scan(/.+/)
            # this shouldn't be
            #kind = :reserved
            #raise match 
            match, kind = handle_error(match, options)
          end
        end
  
        match ||= matched
        encoder.text_token(match, kind)
      end

      if @state == :quote
        encoder.end_group :string 
      end

      encoder
    end
  

    def match_array(match, encoder)
        match =~ /(.+)\[(.*?)\]/
        var = $1
        key = $2
        kind = IDENT_KIND[var]
        kind = :instance_variable if kind == :ident
        encoder.text_token(var, kind)
        encoder.text_token("[", :operator)
        encoder.text_token(key, :key)
        encoder.text_token("]", :operator)
    end
  
    def handle_error(match, options)
      o = {:ignore_errors => true}.merge(options)
      if o[:ignore_errors]
        [match, :plain]
      else
        [">>>>>#{match}<<<<<", :error]        
      end
    end

  end
end
end