diff options
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r-- | lib/coderay/scanners/bash.rb | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/lib/coderay/scanners/bash.rb b/lib/coderay/scanners/bash.rb new file mode 100644 index 0000000..b79047e --- /dev/null +++ b/lib/coderay/scanners/bash.rb @@ -0,0 +1,291 @@ +# Scanner for Bash +# Author: Petr Kovar <pejuko@gmail.com> + +module CodeRay module Scanners + + class Bash < Scanner + + register_for :bash + file_extension 'sh' + title 'bash script' + + RESERVED_WORDS = %w( + ! [[ ]] case do done elif else esac fi for function if in select then time until while { } + ) + + COMMANDS = %w( + : . break cd continue eval exec exit export getopts hash pwd + readonly return shift test [ ] times trap umask unset + ) + + BASH_COMMANDS = %w( + alias bind builtin caller command declare echo enable help let + local logout printf read set shopt source type typeset ulimit unalias + ) + + PROGRAMS = %w( + awk bash bunzip2 bzcat bzip2 cat chgrp chmod chown cp cut date dd df dir dmesg du ed egrep + false fgrep findmnt fusermount gawk grep groups gunzip gzip hostname install keyctl kill less + ln loadkeys login ls lsblk lsinitcpio lsmod mbchk mkdir mkfifo mknod more mount mountpoint mv + netstat pidof ping ping6 ps pwd readlink red rm rmdir sed sh shred sleep stty su sudo sync tar + touch tput tr traceroute traceroute6 true umount uname uncompress vdir zcat + ) + + VARIABLES = %w( + CDPATH HOME IFS MAIL MAILPATH OPTARG OPTIND PATH PS1 PS2 + ) + + BASH_VARIABLES = %w( + BASH BASH_ARGC BASH_ARGV BASH_COMMAND BASH_ENV BASH_EXECUTION_STRING + BASH_LINENO BASH_REMATCH BASH_SOURCE BASH_SUBSHELL BASH_VERSINFO + BASH_VERSINFO[0] BASH_VERSINFO[1] BASH_VERSINFO[2] BASH_VERSINFO[3] + BASH_VERSINFO[4] BASH_VERSINFO[5] BASH_VERSION COLUMNS COMP_CWORD + COMP_LINE COMP_POINT COMP_WORDBREAKS COMP_WORDS COMPREPLAY DIRSTACK + EMACS EUID FCEDIT FIGNORE FUNCNAME GLOBIGNORE GROUPS histchars HISTCMD + HISTCONTROL HISTFILE HISTFILESIZE HISTIGNORE HISTSIZE HISTTIMEFORMAT + HOSTFILE HOSTNAME HOSTTYPE IGNOREEOF INPUTRC LANG LC_ALL LC_COLLATE + LC_CTYPE LC_MESSAGE LC_NUMERIC LINENNO LINES MACHTYPE MAILCHECK OLDPWD + OPTERR OSTYPE PIPESTATUS POSIXLY_CORRECT PPID PROMPT_COMMAND PS3 PS4 PWD + RANDOM REPLAY SECONDS SHELL SHELLOPTS SHLVL TIMEFORMAT TMOUT TMPDIR UID + ) + + PRE_CONSTANTS = / \$\{? (?: \# | \? | \d | \* | @ | - | \$ | \! | _ ) \}? /ox + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(COMMANDS, :method). + add(BASH_COMMANDS, :method). +# add(PROGRAMS, :method). + add(VARIABLES, :predefined). + add(BASH_VARIABLES, :predefined) + + attr_reader :state, :quote + + def initialize(*args) + super(*args) + @state = :initial + @quote = nil + @shell = false + @brace_shell = 0 + @quote_brace_shell = 0 + end + + def scan_tokens encoder, options + + until eos? + kind = match = nil + + if match = scan(/\n/) + encoder.text_token(match, :space) + next + end + + if @state == :initial + if match = scan(/\A#!.*/) + kind = :directive + elsif match = scan(/\s*#.*/) + kind = :comment + elsif match = scan(/[^"]#/) + kind = :ident + elsif match = scan(/\.\.+/) + kind = :plain + elsif match = scan(/(?:\.|source)\s+/) + kind = :reserved + elsif match = scan(/(?:\\.|,)/) + kind = :plain + elsif match = scan(/;/) + kind = :delimiter + elsif match = scan(/"/) + @state = :quote + @quote = match + encoder.begin_group :string + encoder.text_token(match, :delimiter) + next + elsif match = scan(/<<\S+/) + @state = :quote + match =~ /<<(\S+)/ + @quote = "#{$1}" + encoder.begin_group :string + encoder.text_token(match, :delimiter) + next + elsif match = scan(/`/) + if @shell + encoder.text_token(match, :delimiter) + encoder.end_group :shell + else + encoder.begin_group :shell + encoder.text_token(match, :delimiter) + end + @shell = (not @shell) + next + elsif match = scan(/'[^']*'?/) + kind = :string + elsif match = scan(/(?: \& | > | < | \| >> | << | >\& )/ox) + kind = :binary + elsif match = scan(/\d+[\.-](?:\d+[\.-]?)+/) + #versions, dates, and hyphen delimited numbers + kind = :float + elsif match = scan(/\d+\.\d+\s+/) + kind = :float + elsif match = scan(/\d+/) + kind = :integer + elsif match = scan(/ (?: \$\(\( | \)\) ) /x) + kind = :global_variable + elsif match = scan(/ \$\{ [^\}]+ \} /ox) + match =~ /\$\{(.*)\}/ + var=$1 + if var =~ /\[.*\]/ + encoder.text_token("${", :instance_variable) + match_array(var, encoder) + encoder.text_token("}", :instance_variable) + next + end + kind = IDENT_KIND[var] + kind = :instance_variable if kind == :ident + #elsif match = scan(/ \$\( [^\)]+ \) /ox) + elsif match = scan(/ \$\( /ox) + @brace_shell += 1 + encoder.begin_group :shell + encoder.text_token(match, :delimiter) + next + elsif @brace_shell > 0 && match = scan(/ \) /ox) + encoder.text_token(match, :delimiter) + encoder.end_group :shell + @brace_shell -= 1 + next + elsif match = scan(PRE_CONSTANTS) + kind = :predefined_constant + elsif match = scan(/[^\s'"]*[A-Za-z_][A-Za-z_0-9]*\+?=/) + match =~ /(.*?)([A-Za-z_][A-Za-z_0-9]*)(\+?=)/ + str = $1 + pre = $2 + op = $3 + kind = :plain + if str.to_s.strip.empty? + kind = IDENT_KIND[pre] + kind = :instance_variable if kind == :ident + encoder.text_token(pre, kind) + encoder.text_token(op, :operator) + next + end + elsif match = scan(/[A-Za-z_]+\[[A-Za-z_\@\*\d]+\]/) + # array + match_array(match, encoder) + next + elsif match = scan(/ \$[A-Za-z_][A-Za-z_0-9]* /ox) + match =~ /\$(.*)/ + kind = IDENT_KIND[$1] + kind = :instance_variable if kind == :ident + elsif match = scan(/read \S+/) + match =~ /read(\s+)(\S+)/ + encoder.text_token('read', :method) + encoder.text_token($1, :space) + encoder.text_token($2, :instance_variable) + next + elsif match = scan(/[\!\:\[\]\{\}]/) + kind = :reserved + elsif match = scan(/ [A-Za-z_][A-Za-z_\d]*;? /x) + match =~ /([^;]+);?/ + kind = IDENT_KIND[$1] + if match[/([^;]+);$/] + encoder.text_token($1, kind) + encoder.text_token(';', :delimiter) + next + end + elsif match = scan(/(?: = | - | \+ | \{ | \} | \( | \) | && | \|\| | ;; | ! )/ox) + kind = :operator + elsif match = scan(/\s+/) + kind = :space + elsif match = scan(/[^ \$"'`\d]/) + kind = :plain + elsif match = scan(/.+/) + # this shouldn't be :reserved for highlighting bad matches + match, kind = handle_error(match, options) + end + elsif @state == :quote + if (match = scan(/\\.?/)) + kind = :content + elsif match = scan(/#{@quote}/) + encoder.text_token(match, :delimiter) + encoder.end_group :string + @quote = nil + @state = :initial + next + #kind = :symbol + elsif match = scan(PRE_CONSTANTS) + kind = :predefined_constant + elsif match = scan(/ (?: \$\(\(.*?\)\) ) /x) + kind = :global_variable + elsif match = scan(/ \$\( /ox) + encoder.begin_group :shell + encoder.text_token(match, :delimiter) + @quote_brace_shell += 1 + next + elsif match = scan(/\)/) + if @quote_brace_shell > 0 + encoder.text_token(match, :delimiter) + encoder.end_group :shell + @quote_brace_shell -= 1 + next + else + kind = :content + end + elsif match = scan(/ \$ (?: (?: \{ [^\}]* \}) | (?: [A-Za-z_0-9]+ ) ) /x) + match =~ /(\$\{?)([^\}]*)(\}?)/ + pre=$1 + var=$2 + post=$3 + if var =~ /\[.*?\]/ + encoder.text_token(pre,:instance_variable) + match_array(var, encoder) + encoder.text_token(post,:instance_variable) + next + end + kind = IDENT_KIND[match] + kind = :instance_variable if kind == :ident + elsif match = scan(/[^\)\$#{@quote}\\]+/) + kind = :content + else match = scan(/.+/) + # this shouldn't be + #kind = :reserved + #raise match + match, kind = handle_error(match, options) + end + end + + match ||= matched + encoder.text_token(match, kind) + end + + if @state == :quote + encoder.end_group :string + end + + encoder + end + + + def match_array(match, encoder) + match =~ /(.+)\[(.*?)\]/ + var = $1 + key = $2 + kind = IDENT_KIND[var] + kind = :instance_variable if kind == :ident + encoder.text_token(var, kind) + encoder.text_token("[", :operator) + encoder.text_token(key, :key) + encoder.text_token("]", :operator) + end + + def handle_error(match, options) + o = {:ignore_errors => true}.merge(options) + if o[:ignore_errors] + [match, :plain] + else + [">>>>>#{match}<<<<<", :error] + end + end + + end +end +end |