diff options
author | murphy <murphy@rubychan.de> | 2009-04-20 21:19:41 +0000 |
---|---|---|
committer | murphy <murphy@rubychan.de> | 2009-04-20 21:19:41 +0000 |
commit | 32701fcf0c5f0e54d7172ceb86e44664aa4bae10 (patch) | |
tree | 5185f2c879f0478a89b2dcbc2a0a632ddaf838b9 /lib/coderay | |
parent | cf0b7a2a80808c06cb51ad2ee8971082d96cd41e (diff) | |
download | coderay-32701fcf0c5f0e54d7172ceb86e44664aa4bae10.tar.gz |
New: *PHP and SQL Scanners*
* Both not well tested yet, preview versions.
* Some example code for both languages.
* PHP scanner original by Stefan Walk.
* SQL scanner using code by Keith Pitt and Josh Goebel.
Diffstat (limited to 'lib/coderay')
-rw-r--r-- | lib/coderay/helpers/file_type.rb | 5 | ||||
-rw-r--r-- | lib/coderay/scanners/_map.rb | 12 | ||||
-rw-r--r-- | lib/coderay/scanners/php.rb | 284 | ||||
-rw-r--r-- | lib/coderay/scanners/sql.rb | 159 |
4 files changed, 456 insertions, 4 deletions
diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 7f472d6..3b57608 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -96,6 +96,10 @@ module FileType 'json' => :json, 'mab' => :ruby, 'patch' => :diff, + 'php' => :php, + 'php3' => :php, + 'php4' => :php, + 'php5' => :php, 'py' => :python, 'py3' => :python, 'pyw' => :python, @@ -105,6 +109,7 @@ module FileType 'rbw' => :ruby, 'rhtml' => :rhtml, 'sch' => :scheme, + 'sql' => :sql, 'ss' => :scheme, 'xhtml' => :xhtml, 'xml' => :xml, diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index c9ac75f..9f08d7d 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -1,14 +1,18 @@ module CodeRay module Scanners - map :cpp => :c, - :plain => :plaintext, - :pascal => :delphi, + map \ + :cpp => :c, + :ecma => :java_script, + :ecmascript => :java_script, + :ecma_script => :java_script, :irb => :ruby, - :xhtml => :nitro_xhtml, :javascript => :java_script, :js => :java_script, :nitro => :nitro_xhtml, + :pascal => :delphi, + :plain => :plaintext, + :xhtml => :html, :yml => :yaml default :plain diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb new file mode 100644 index 0000000..0912ea5 --- /dev/null +++ b/lib/coderay/scanners/php.rb @@ -0,0 +1,284 @@ +class Regexp + def |(other) + Regexp.union(self, other) + end + def +(other) + /#{self}#{other}/ + end +end +module CodeRay +module Scanners + + load :html + + # TODO: Complete rewrite. This scanner is buggy. + class PHP < Scanner + + register_for :php + file_extension 'php' + + def setup + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def reset_instance + super + @html_scanner.reset + end + + module Words + ControlKeywords = %w! + if else elseif while do for switch case default declare foreach as + endif endwhile endfor endforeach endswitch enddeclare return break + continue exit die try catch throw + ! + OtherKeywords = %w! + function class extends implements instanceof parent self var const + private public protected static abstract final global new echo include + require include_once require_once eval print use unset isset empty + interface list array clone null true false + ! + + SpecialConstants = %w! __LINE__ __FILE__ __CLASS__ + __METHOD__ __FUNCTION__ + ! + IdentKinds = WordList.new(:ident). + add(ControlKeywords, :reserved). + add(OtherKeywords, :pre_type). + add(SpecialConstants, :pre_constant) + end + module RE + def self.build_alternatives(array) + Regexp.new(array.map { |s| Regexp.escape(s) }.join('|') , Regexp::IGNORECASE) + end + + PHPStart = / + <script language="php"> | + <script language='php'> | + <\?php | + <\?(?!xml) | + <% + /xi + + PHPEnd = %r! + </script> | + \?> | + %> + !xi + + IChar = /[a-z0-9_\x80-\xFF]/i + IStart = /[a-z_\x80-\xFF]/i + Identifier = /#{IStart}#{IChar}*/ + Variable = /\$#{Identifier}/ + + Typecasts = build_alternatives %w! + float double real int integer bool boolean string array object null + !.map{|s| "(#{s})"} + OneLineComment1 = %r!//.*?(?=#{PHPEnd}|$)! + OneLineComment2 = %r!#.*?(?=#{PHPEnd}|$)! + OneLineComment = OneLineComment1 | OneLineComment2 + + HereDoc = /<<</ + Identifier + + binops = %w! + + - * / << >> & | ^ . % + ! + + ComparisionOperator = build_alternatives %w$ + === !== == != <= >= + $ + IncDecOperator = build_alternatives %w! ++ -- ! + + BinaryOperator = build_alternatives binops + AssignOperator = build_alternatives binops.map {|s| "${s}=" } + LogicalOperator = build_alternatives %w! and or xor not ! + ObjectOperator = build_alternatives %w! -> :: ! + OtherOperator = build_alternatives %w$ => = ? : [ ] ( ) ; , ~ ! @ > <$ + + Operator = ComparisionOperator | IncDecOperator | LogicalOperator | + ObjectOperator | AssignOperator | BinaryOperator | OtherOperator + + + S = /\s+/ + + Integer = /-?0x[0-9a-fA-F]/ | /-?\d+/ + Float = /-?(?:\d+\.\d*|\d*\.\d+)(?:e[+-]\d+)?/ + + end + + def scan_tokens tokens, options + states = [:php] + heredocdelim = nil + + until eos? + + match = nil + kind = nil + + case states.last + when :html + if scan RE::PHPStart + kind = :delimiter + states.pop + else + match = scan_until(/(?=#{RE::PHPStart})/o) || scan_until(/\z/) + @html_scanner.tokenize match if not match.empty? + kind = :space + match = '' + end + + when :php + if scan RE::PHPEnd + kind = :delimiter + states.push :html + + elsif scan RE::S + kind = :space + + elsif scan(/\/\*/) + kind = :comment + states.push :mlcomment + + elsif scan RE::OneLineComment + kind = :comment + + elsif match = scan(RE::Identifier) + kind = Words::IdentKinds[match] + if kind == :ident && check(/:(?!:)/) #&& tokens[-2][0] == 'case' +# match << scan(/:/) + kind = :label + elsif kind == :ident and match =~ /^[A-Z]/ + kind = :constant + end + + elsif scan RE::Integer + kind = :integer + + elsif scan RE::Float + kind = :float + + elsif scan(/'/) + kind = :delimiter + states.push :sqstring + + elsif scan(/"/) + kind = :delimiter + states.push :dqstring + + elsif match = scan(RE::HereDoc) + heredocdelim = match[RE::Identifier] + kind = :delimiter + # states.push :heredocstring + + elsif scan RE::Variable + kind = :local_variable + + elsif scan(/\{/) + kind = :operator + states.push :php + + elsif scan(/\}/) + if states.length == 1 + kind = :error + else + kind = :operator + states.pop + end + + elsif scan RE::Operator + kind = :operator + + else + getch + kind = :error + + end + + when :mlcomment + if scan(/(?:[^\n\r\f*]|\*(?!\/))+/) + kind = :comment + + elsif scan(/\*\//) + kind = :comment + states.pop + + elsif scan(/[\r\n\f]+/) + kind = :space + end + + when :sqstring + if scan(/[^\r\n\f'\\]+/) + kind = :string + elsif match = scan(/\\\\|\\'/) + kind = :char + elsif scan(/\\/) + kind = :string + elsif scan(/[\r\n\f ]+/) + kind = :space + elsif scan(/'/) + kind = :delimiter + states.pop + end + + when :dqstring +#todo: $foo[bar] kind of stuff + if scan(/[^\r\n\f"${\\]+/) + kind = :string + elsif scan(/\\x[a-fA-F]{2}/) + kind = :char + elsif scan(/\\\d{3}/) + kind = :char + elsif scan(/\\["\\abcfnrtyv]/) + kind = :char + elsif scan(/\\/) + kind = :string + elsif scan(/[\r\n\f]+/) + kind = :space + elsif match = scan(/#{RE::Variable}/o) + kind = :local_variable + if check(/\[#{RE::Identifier}\]/o) + match << scan(/\[#{RE::Identifier}\]/o) + elsif check(/\[/) + match << scan(/\[#{RE::Identifier}?/o) + kind = :error + elsif check(/->#{RE::Identifier}/o) + match << scan(/->#{RE::Identifier}/o) + end + elsif scan(/\{/) + if check(/\$/) + kind = :operator + states.push :php + else + kind = :string + end + match = '{' + elsif scan(/\$\{#{RE::Identifier}\}/o) + kind = :local_variable + elsif scan(/\$/) + kind = :string + elsif scan(/"/) + kind = :delimiter + states.pop + end + else + raise_inspect 'Unknown state!', tokens, states + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens, states + end + raise_inspect 'Empty token', tokens, states unless match + + tokens << [match, kind] + + end + tokens + + end + + end + +end +end diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb new file mode 100644 index 0000000..9ba0b4d --- /dev/null +++ b/lib/coderay/scanners/sql.rb @@ -0,0 +1,159 @@ +module CodeRay module Scanners + + # by Josh Goebel + class SQL < Scanner + + register_for :sql + + RESERVED_WORDS = %w( + create table index trigger drop primary key set select + insert update delete replace into + on from values before and or if exists case when + then else as group order by avg where + join inner outer union engine not + like end using collate show columns begin + ) + + PREDEFINED_TYPES = %w( + char varchar enum binary text tinytext mediumtext + longtext blob tinyblob mediumblob longblob timestamp + date time datetime year double decimal float int + integer tinyint mediumint bigint smallint unsigned bit + bool boolean hex bin oct + ) + + PREDEFINED_FUNCTIONS = %w( sum cast abs pi count min max avg ) + + DIRECTIVES = %w( auto_increment unique default charset ) + + PREDEFINED_CONSTANTS = %w( null true false ) + + IDENT_KIND = CaseIgnoringWordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant). + add(PREDEFINED_FUNCTIONS, :predefined). + add(DIRECTIVES, :directive) + + ESCAPE = / [rbfnrtv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + STRING_PREFIXES = /[xnb]|_\w+/i + + def scan_tokens tokens, options + + state = :initial + string_type = nil + string_content = '' + + until eos? + + kind = nil + match = nil + + if state == :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(/^(?:--\s?|#).*/) + kind = :comment + + elsif scan(%r! /\* (?: .*? \*/ | .* ) !mx) + kind = :comment + + elsif scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x) + kind = :operator + + elsif scan(/(#{STRING_PREFIXES})?([`"'])/o) + prefix = self[1] + string_type = self[2] + tokens << [:open, :string] + tokens << [prefix, :modifier] if prefix + match = string_type + state = :string + kind = :delimiter + + elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x) + kind = match[0] == ?@ ? :variable : IDENT_KIND[match.downcase] + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :hex + + elsif scan(/0[0-7]+(?![89.eEfF])/) + kind = :oct + + elsif scan(/(?>\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) + kind = :float + + else + getch + kind = :error + + end + + elsif state == :string + if match = scan(/[^\\"'`]+/) + string_content << match + next + elsif match = scan(/["'`]/) + if string_type == match + if peek(1) == string_type # doubling means escape + string_content << string_type << getch + next + end + unless string_content.empty? + tokens << [string_content, :content] + string_content = '' + end + tokens << [matched, :delimiter] + tokens << [:close, :string] + state = :initial + string_type = nil + next + else + string_content << match + end + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + unless string_content.empty? + tokens << [string_content, :content] + string_content = '' + end + kind = :char + elsif match = scan(/ \\ . /mox) + string_content << match + next + elsif scan(/ \\ | $ /x) + unless string_content.empty? + tokens << [string_content, :content] + string_content = '' + end + kind = :error + state = :initial + else + raise "else case \" reached; %p not handled." % peek(1), tokens + end + + else + raise 'else-case reached', tokens + + end + + match ||= matched +# raise [match, kind], tokens if kind == :error + + tokens << [match, kind] + + end +# RAILS_DEFAULT_LOGGER.info tokens.inspect + tokens + + end + + end + +end end
\ No newline at end of file |