summaryrefslogtreecommitdiff
path: root/lib/coderay
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2009-04-20 21:19:41 +0000
committermurphy <murphy@rubychan.de>2009-04-20 21:19:41 +0000
commit32701fcf0c5f0e54d7172ceb86e44664aa4bae10 (patch)
tree5185f2c879f0478a89b2dcbc2a0a632ddaf838b9 /lib/coderay
parentcf0b7a2a80808c06cb51ad2ee8971082d96cd41e (diff)
downloadcoderay-32701fcf0c5f0e54d7172ceb86e44664aa4bae10.tar.gz
New: *PHP and SQL Scanners*
* Both not well tested yet, preview versions. * Some example code for both languages. * PHP scanner original by Stefan Walk. * SQL scanner using code by Keith Pitt and Josh Goebel.
Diffstat (limited to 'lib/coderay')
-rw-r--r--lib/coderay/helpers/file_type.rb5
-rw-r--r--lib/coderay/scanners/_map.rb12
-rw-r--r--lib/coderay/scanners/php.rb284
-rw-r--r--lib/coderay/scanners/sql.rb159
4 files changed, 456 insertions, 4 deletions
diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb
index 7f472d6..3b57608 100644
--- a/lib/coderay/helpers/file_type.rb
+++ b/lib/coderay/helpers/file_type.rb
@@ -96,6 +96,10 @@ module FileType
'json' => :json,
'mab' => :ruby,
'patch' => :diff,
+ 'php' => :php,
+ 'php3' => :php,
+ 'php4' => :php,
+ 'php5' => :php,
'py' => :python,
'py3' => :python,
'pyw' => :python,
@@ -105,6 +109,7 @@ module FileType
'rbw' => :ruby,
'rhtml' => :rhtml,
'sch' => :scheme,
+ 'sql' => :sql,
'ss' => :scheme,
'xhtml' => :xhtml,
'xml' => :xml,
diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb
index c9ac75f..9f08d7d 100644
--- a/lib/coderay/scanners/_map.rb
+++ b/lib/coderay/scanners/_map.rb
@@ -1,14 +1,18 @@
module CodeRay
module Scanners
- map :cpp => :c,
- :plain => :plaintext,
- :pascal => :delphi,
+ map \
+ :cpp => :c,
+ :ecma => :java_script,
+ :ecmascript => :java_script,
+ :ecma_script => :java_script,
:irb => :ruby,
- :xhtml => :nitro_xhtml,
:javascript => :java_script,
:js => :java_script,
:nitro => :nitro_xhtml,
+ :pascal => :delphi,
+ :plain => :plaintext,
+ :xhtml => :html,
:yml => :yaml
default :plain
diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb
new file mode 100644
index 0000000..0912ea5
--- /dev/null
+++ b/lib/coderay/scanners/php.rb
@@ -0,0 +1,284 @@
+class Regexp
+ def |(other)
+ Regexp.union(self, other)
+ end
+ def +(other)
+ /#{self}#{other}/
+ end
+end
+module CodeRay
+module Scanners
+
+ load :html
+
+ # TODO: Complete rewrite. This scanner is buggy.
+ class PHP < Scanner
+
+ register_for :php
+ file_extension 'php'
+
+ def setup
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
+ end
+
+ def reset_instance
+ super
+ @html_scanner.reset
+ end
+
+ module Words
+ ControlKeywords = %w!
+ if else elseif while do for switch case default declare foreach as
+ endif endwhile endfor endforeach endswitch enddeclare return break
+ continue exit die try catch throw
+ !
+ OtherKeywords = %w!
+ function class extends implements instanceof parent self var const
+ private public protected static abstract final global new echo include
+ require include_once require_once eval print use unset isset empty
+ interface list array clone null true false
+ !
+
+ SpecialConstants = %w! __LINE__ __FILE__ __CLASS__
+ __METHOD__ __FUNCTION__
+ !
+ IdentKinds = WordList.new(:ident).
+ add(ControlKeywords, :reserved).
+ add(OtherKeywords, :pre_type).
+ add(SpecialConstants, :pre_constant)
+ end
+ module RE
+ def self.build_alternatives(array)
+ Regexp.new(array.map { |s| Regexp.escape(s) }.join('|') , Regexp::IGNORECASE)
+ end
+
+ PHPStart = /
+ <script language="php"> |
+ <script language='php'> |
+ <\?php |
+ <\?(?!xml) |
+ <%
+ /xi
+
+ PHPEnd = %r!
+ </script> |
+ \?> |
+ %>
+ !xi
+
+ IChar = /[a-z0-9_\x80-\xFF]/i
+ IStart = /[a-z_\x80-\xFF]/i
+ Identifier = /#{IStart}#{IChar}*/
+ Variable = /\$#{Identifier}/
+
+ Typecasts = build_alternatives %w!
+ float double real int integer bool boolean string array object null
+ !.map{|s| "(#{s})"}
+ OneLineComment1 = %r!//.*?(?=#{PHPEnd}|$)!
+ OneLineComment2 = %r!#.*?(?=#{PHPEnd}|$)!
+ OneLineComment = OneLineComment1 | OneLineComment2
+
+ HereDoc = /<<</ + Identifier
+
+ binops = %w!
+ + - * / << >> & | ^ . %
+ !
+
+ ComparisionOperator = build_alternatives %w$
+ === !== == != <= >=
+ $
+ IncDecOperator = build_alternatives %w! ++ -- !
+
+ BinaryOperator = build_alternatives binops
+ AssignOperator = build_alternatives binops.map {|s| "${s}=" }
+ LogicalOperator = build_alternatives %w! and or xor not !
+ ObjectOperator = build_alternatives %w! -> :: !
+ OtherOperator = build_alternatives %w$ => = ? : [ ] ( ) ; , ~ ! @ > <$
+
+ Operator = ComparisionOperator | IncDecOperator | LogicalOperator |
+ ObjectOperator | AssignOperator | BinaryOperator | OtherOperator
+
+
+ S = /\s+/
+
+ Integer = /-?0x[0-9a-fA-F]/ | /-?\d+/
+ Float = /-?(?:\d+\.\d*|\d*\.\d+)(?:e[+-]\d+)?/
+
+ end
+
+ def scan_tokens tokens, options
+ states = [:php]
+ heredocdelim = nil
+
+ until eos?
+
+ match = nil
+ kind = nil
+
+ case states.last
+ when :html
+ if scan RE::PHPStart
+ kind = :delimiter
+ states.pop
+ else
+ match = scan_until(/(?=#{RE::PHPStart})/o) || scan_until(/\z/)
+ @html_scanner.tokenize match if not match.empty?
+ kind = :space
+ match = ''
+ end
+
+ when :php
+ if scan RE::PHPEnd
+ kind = :delimiter
+ states.push :html
+
+ elsif scan RE::S
+ kind = :space
+
+ elsif scan(/\/\*/)
+ kind = :comment
+ states.push :mlcomment
+
+ elsif scan RE::OneLineComment
+ kind = :comment
+
+ elsif match = scan(RE::Identifier)
+ kind = Words::IdentKinds[match]
+ if kind == :ident && check(/:(?!:)/) #&& tokens[-2][0] == 'case'
+# match << scan(/:/)
+ kind = :label
+ elsif kind == :ident and match =~ /^[A-Z]/
+ kind = :constant
+ end
+
+ elsif scan RE::Integer
+ kind = :integer
+
+ elsif scan RE::Float
+ kind = :float
+
+ elsif scan(/'/)
+ kind = :delimiter
+ states.push :sqstring
+
+ elsif scan(/"/)
+ kind = :delimiter
+ states.push :dqstring
+
+ elsif match = scan(RE::HereDoc)
+ heredocdelim = match[RE::Identifier]
+ kind = :delimiter
+ # states.push :heredocstring
+
+ elsif scan RE::Variable
+ kind = :local_variable
+
+ elsif scan(/\{/)
+ kind = :operator
+ states.push :php
+
+ elsif scan(/\}/)
+ if states.length == 1
+ kind = :error
+ else
+ kind = :operator
+ states.pop
+ end
+
+ elsif scan RE::Operator
+ kind = :operator
+
+ else
+ getch
+ kind = :error
+
+ end
+
+ when :mlcomment
+ if scan(/(?:[^\n\r\f*]|\*(?!\/))+/)
+ kind = :comment
+
+ elsif scan(/\*\//)
+ kind = :comment
+ states.pop
+
+ elsif scan(/[\r\n\f]+/)
+ kind = :space
+ end
+
+ when :sqstring
+ if scan(/[^\r\n\f'\\]+/)
+ kind = :string
+ elsif match = scan(/\\\\|\\'/)
+ kind = :char
+ elsif scan(/\\/)
+ kind = :string
+ elsif scan(/[\r\n\f ]+/)
+ kind = :space
+ elsif scan(/'/)
+ kind = :delimiter
+ states.pop
+ end
+
+ when :dqstring
+#todo: $foo[bar] kind of stuff
+ if scan(/[^\r\n\f"${\\]+/)
+ kind = :string
+ elsif scan(/\\x[a-fA-F]{2}/)
+ kind = :char
+ elsif scan(/\\\d{3}/)
+ kind = :char
+ elsif scan(/\\["\\abcfnrtyv]/)
+ kind = :char
+ elsif scan(/\\/)
+ kind = :string
+ elsif scan(/[\r\n\f]+/)
+ kind = :space
+ elsif match = scan(/#{RE::Variable}/o)
+ kind = :local_variable
+ if check(/\[#{RE::Identifier}\]/o)
+ match << scan(/\[#{RE::Identifier}\]/o)
+ elsif check(/\[/)
+ match << scan(/\[#{RE::Identifier}?/o)
+ kind = :error
+ elsif check(/->#{RE::Identifier}/o)
+ match << scan(/->#{RE::Identifier}/o)
+ end
+ elsif scan(/\{/)
+ if check(/\$/)
+ kind = :operator
+ states.push :php
+ else
+ kind = :string
+ end
+ match = '{'
+ elsif scan(/\$\{#{RE::Identifier}\}/o)
+ kind = :local_variable
+ elsif scan(/\$/)
+ kind = :string
+ elsif scan(/"/)
+ kind = :delimiter
+ states.pop
+ end
+ else
+ raise_inspect 'Unknown state!', tokens, states
+ end
+
+ match ||= matched
+ if $DEBUG and not kind
+ raise_inspect 'Error token %p in line %d' %
+ [[match, kind], line], tokens, states
+ end
+ raise_inspect 'Empty token', tokens, states unless match
+
+ tokens << [match, kind]
+
+ end
+ tokens
+
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb
new file mode 100644
index 0000000..9ba0b4d
--- /dev/null
+++ b/lib/coderay/scanners/sql.rb
@@ -0,0 +1,159 @@
+module CodeRay module Scanners
+
+ # by Josh Goebel
+ class SQL < Scanner
+
+ register_for :sql
+
+ RESERVED_WORDS = %w(
+ create table index trigger drop primary key set select
+ insert update delete replace into
+ on from values before and or if exists case when
+ then else as group order by avg where
+ join inner outer union engine not
+ like end using collate show columns begin
+ )
+
+ PREDEFINED_TYPES = %w(
+ char varchar enum binary text tinytext mediumtext
+ longtext blob tinyblob mediumblob longblob timestamp
+ date time datetime year double decimal float int
+ integer tinyint mediumint bigint smallint unsigned bit
+ bool boolean hex bin oct
+ )
+
+ PREDEFINED_FUNCTIONS = %w( sum cast abs pi count min max avg )
+
+ DIRECTIVES = %w( auto_increment unique default charset )
+
+ PREDEFINED_CONSTANTS = %w( null true false )
+
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_TYPES, :pre_type).
+ add(PREDEFINED_CONSTANTS, :pre_constant).
+ add(PREDEFINED_FUNCTIONS, :predefined).
+ add(DIRECTIVES, :directive)
+
+ ESCAPE = / [rbfnrtv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
+
+ STRING_PREFIXES = /[xnb]|_\w+/i
+
+ def scan_tokens tokens, options
+
+ state = :initial
+ string_type = nil
+ string_content = ''
+
+ until eos?
+
+ kind = nil
+ match = nil
+
+ if state == :initial
+
+ if scan(/ \s+ | \\\n /x)
+ kind = :space
+
+ elsif scan(/^(?:--\s?|#).*/)
+ kind = :comment
+
+ elsif scan(%r! /\* (?: .*? \*/ | .* ) !mx)
+ kind = :comment
+
+ elsif scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x)
+ kind = :operator
+
+ elsif scan(/(#{STRING_PREFIXES})?([`"'])/o)
+ prefix = self[1]
+ string_type = self[2]
+ tokens << [:open, :string]
+ tokens << [prefix, :modifier] if prefix
+ match = string_type
+ state = :string
+ kind = :delimiter
+
+ elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x)
+ kind = match[0] == ?@ ? :variable : IDENT_KIND[match.downcase]
+
+ elsif scan(/0[xX][0-9A-Fa-f]+/)
+ kind = :hex
+
+ elsif scan(/0[0-7]+(?![89.eEfF])/)
+ kind = :oct
+
+ elsif scan(/(?>\d+)(?![.eEfF])/)
+ kind = :integer
+
+ elsif scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
+ kind = :float
+
+ else
+ getch
+ kind = :error
+
+ end
+
+ elsif state == :string
+ if match = scan(/[^\\"'`]+/)
+ string_content << match
+ next
+ elsif match = scan(/["'`]/)
+ if string_type == match
+ if peek(1) == string_type # doubling means escape
+ string_content << string_type << getch
+ next
+ end
+ unless string_content.empty?
+ tokens << [string_content, :content]
+ string_content = ''
+ end
+ tokens << [matched, :delimiter]
+ tokens << [:close, :string]
+ state = :initial
+ string_type = nil
+ next
+ else
+ string_content << match
+ end
+ next
+ elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ unless string_content.empty?
+ tokens << [string_content, :content]
+ string_content = ''
+ end
+ kind = :char
+ elsif match = scan(/ \\ . /mox)
+ string_content << match
+ next
+ elsif scan(/ \\ | $ /x)
+ unless string_content.empty?
+ tokens << [string_content, :content]
+ string_content = ''
+ end
+ kind = :error
+ state = :initial
+ else
+ raise "else case \" reached; %p not handled." % peek(1), tokens
+ end
+
+ else
+ raise 'else-case reached', tokens
+
+ end
+
+ match ||= matched
+# raise [match, kind], tokens if kind == :error
+
+ tokens << [match, kind]
+
+ end
+# RAILS_DEFAULT_LOGGER.info tokens.inspect
+ tokens
+
+ end
+
+ end
+
+end end \ No newline at end of file