summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/php.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/coderay/scanners/php.rb')
-rw-r--r--lib/coderay/scanners/php.rb284
1 files changed, 284 insertions, 0 deletions
diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb
new file mode 100644
index 0000000..0912ea5
--- /dev/null
+++ b/lib/coderay/scanners/php.rb
@@ -0,0 +1,284 @@
+class Regexp
+ def |(other)
+ Regexp.union(self, other)
+ end
+ def +(other)
+ /#{self}#{other}/
+ end
+end
+module CodeRay
+module Scanners
+
+ load :html
+
+ # TODO: Complete rewrite. This scanner is buggy.
+ class PHP < Scanner
+
+ register_for :php
+ file_extension 'php'
+
+ def setup
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
+ end
+
+ def reset_instance
+ super
+ @html_scanner.reset
+ end
+
+ module Words
+ ControlKeywords = %w!
+ if else elseif while do for switch case default declare foreach as
+ endif endwhile endfor endforeach endswitch enddeclare return break
+ continue exit die try catch throw
+ !
+ OtherKeywords = %w!
+ function class extends implements instanceof parent self var const
+ private public protected static abstract final global new echo include
+ require include_once require_once eval print use unset isset empty
+ interface list array clone null true false
+ !
+
+ SpecialConstants = %w! __LINE__ __FILE__ __CLASS__
+ __METHOD__ __FUNCTION__
+ !
+ IdentKinds = WordList.new(:ident).
+ add(ControlKeywords, :reserved).
+ add(OtherKeywords, :pre_type).
+ add(SpecialConstants, :pre_constant)
+ end
+ module RE
+ def self.build_alternatives(array)
+ Regexp.new(array.map { |s| Regexp.escape(s) }.join('|') , Regexp::IGNORECASE)
+ end
+
+ PHPStart = /
+ <script language="php"> |
+ <script language='php'> |
+ <\?php |
+ <\?(?!xml) |
+ <%
+ /xi
+
+ PHPEnd = %r!
+ </script> |
+ \?> |
+ %>
+ !xi
+
+ IChar = /[a-z0-9_\x80-\xFF]/i
+ IStart = /[a-z_\x80-\xFF]/i
+ Identifier = /#{IStart}#{IChar}*/
+ Variable = /\$#{Identifier}/
+
+ Typecasts = build_alternatives %w!
+ float double real int integer bool boolean string array object null
+ !.map{|s| "(#{s})"}
+ OneLineComment1 = %r!//.*?(?=#{PHPEnd}|$)!
+ OneLineComment2 = %r!#.*?(?=#{PHPEnd}|$)!
+ OneLineComment = OneLineComment1 | OneLineComment2
+
+ HereDoc = /<<</ + Identifier
+
+ binops = %w!
+ + - * / << >> & | ^ . %
+ !
+
+ ComparisionOperator = build_alternatives %w$
+ === !== == != <= >=
+ $
+ IncDecOperator = build_alternatives %w! ++ -- !
+
+ BinaryOperator = build_alternatives binops
+ AssignOperator = build_alternatives binops.map {|s| "${s}=" }
+ LogicalOperator = build_alternatives %w! and or xor not !
+ ObjectOperator = build_alternatives %w! -> :: !
+ OtherOperator = build_alternatives %w$ => = ? : [ ] ( ) ; , ~ ! @ > <$
+
+ Operator = ComparisionOperator | IncDecOperator | LogicalOperator |
+ ObjectOperator | AssignOperator | BinaryOperator | OtherOperator
+
+
+ S = /\s+/
+
+ Integer = /-?0x[0-9a-fA-F]/ | /-?\d+/
+ Float = /-?(?:\d+\.\d*|\d*\.\d+)(?:e[+-]\d+)?/
+
+ end
+
+ def scan_tokens tokens, options
+ states = [:php]
+ heredocdelim = nil
+
+ until eos?
+
+ match = nil
+ kind = nil
+
+ case states.last
+ when :html
+ if scan RE::PHPStart
+ kind = :delimiter
+ states.pop
+ else
+ match = scan_until(/(?=#{RE::PHPStart})/o) || scan_until(/\z/)
+ @html_scanner.tokenize match if not match.empty?
+ kind = :space
+ match = ''
+ end
+
+ when :php
+ if scan RE::PHPEnd
+ kind = :delimiter
+ states.push :html
+
+ elsif scan RE::S
+ kind = :space
+
+ elsif scan(/\/\*/)
+ kind = :comment
+ states.push :mlcomment
+
+ elsif scan RE::OneLineComment
+ kind = :comment
+
+ elsif match = scan(RE::Identifier)
+ kind = Words::IdentKinds[match]
+ if kind == :ident && check(/:(?!:)/) #&& tokens[-2][0] == 'case'
+# match << scan(/:/)
+ kind = :label
+ elsif kind == :ident and match =~ /^[A-Z]/
+ kind = :constant
+ end
+
+ elsif scan RE::Integer
+ kind = :integer
+
+ elsif scan RE::Float
+ kind = :float
+
+ elsif scan(/'/)
+ kind = :delimiter
+ states.push :sqstring
+
+ elsif scan(/"/)
+ kind = :delimiter
+ states.push :dqstring
+
+ elsif match = scan(RE::HereDoc)
+ heredocdelim = match[RE::Identifier]
+ kind = :delimiter
+ # states.push :heredocstring
+
+ elsif scan RE::Variable
+ kind = :local_variable
+
+ elsif scan(/\{/)
+ kind = :operator
+ states.push :php
+
+ elsif scan(/\}/)
+ if states.length == 1
+ kind = :error
+ else
+ kind = :operator
+ states.pop
+ end
+
+ elsif scan RE::Operator
+ kind = :operator
+
+ else
+ getch
+ kind = :error
+
+ end
+
+ when :mlcomment
+ if scan(/(?:[^\n\r\f*]|\*(?!\/))+/)
+ kind = :comment
+
+ elsif scan(/\*\//)
+ kind = :comment
+ states.pop
+
+ elsif scan(/[\r\n\f]+/)
+ kind = :space
+ end
+
+ when :sqstring
+ if scan(/[^\r\n\f'\\]+/)
+ kind = :string
+ elsif match = scan(/\\\\|\\'/)
+ kind = :char
+ elsif scan(/\\/)
+ kind = :string
+ elsif scan(/[\r\n\f ]+/)
+ kind = :space
+ elsif scan(/'/)
+ kind = :delimiter
+ states.pop
+ end
+
+ when :dqstring
+#todo: $foo[bar] kind of stuff
+ if scan(/[^\r\n\f"${\\]+/)
+ kind = :string
+ elsif scan(/\\x[a-fA-F]{2}/)
+ kind = :char
+ elsif scan(/\\\d{3}/)
+ kind = :char
+ elsif scan(/\\["\\abcfnrtyv]/)
+ kind = :char
+ elsif scan(/\\/)
+ kind = :string
+ elsif scan(/[\r\n\f]+/)
+ kind = :space
+ elsif match = scan(/#{RE::Variable}/o)
+ kind = :local_variable
+ if check(/\[#{RE::Identifier}\]/o)
+ match << scan(/\[#{RE::Identifier}\]/o)
+ elsif check(/\[/)
+ match << scan(/\[#{RE::Identifier}?/o)
+ kind = :error
+ elsif check(/->#{RE::Identifier}/o)
+ match << scan(/->#{RE::Identifier}/o)
+ end
+ elsif scan(/\{/)
+ if check(/\$/)
+ kind = :operator
+ states.push :php
+ else
+ kind = :string
+ end
+ match = '{'
+ elsif scan(/\$\{#{RE::Identifier}\}/o)
+ kind = :local_variable
+ elsif scan(/\$/)
+ kind = :string
+ elsif scan(/"/)
+ kind = :delimiter
+ states.pop
+ end
+ else
+ raise_inspect 'Unknown state!', tokens, states
+ end
+
+ match ||= matched
+ if $DEBUG and not kind
+ raise_inspect 'Error token %p in line %d' %
+ [[match, kind], line], tokens, states
+ end
+ raise_inspect 'Empty token', tokens, states unless match
+
+ tokens << [match, kind]
+
+ end
+ tokens
+
+ end
+
+ end
+
+end
+end