summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/php.rb
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2009-12-28 07:27:12 +0000
committermurphy <murphy@rubychan.de>2009-12-28 07:27:12 +0000
commit432aeb74d5a49e6f0efd1063113cef099c93aef6 (patch)
treef300ae05c2418721ea188ca1acc763cff06e71b7 /lib/coderay/scanners/php.rb
parent1982b2bf7e7bfa333838cf2f3e7613c00d2a67c3 (diff)
downloadcoderay-432aeb74d5a49e6f0efd1063113cef099c93aef6.tar.gz
Copying changes and fixes for 0.9.0rc3 from terminal-encoder branch over to trunk.
Diffstat (limited to 'lib/coderay/scanners/php.rb')
-rw-r--r--lib/coderay/scanners/php.rb197
1 files changed, 143 insertions, 54 deletions
diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb
index bfbc642..af9e16f 100644
--- a/lib/coderay/scanners/php.rb
+++ b/lib/coderay/scanners/php.rb
@@ -38,7 +38,7 @@ module Scanners
require require_once return print unset
]
- CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
+ CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
# according to http://php.net/quickref.php on 2009-04-21;
# all functions with _ excluded (module functions) and selected additional functions
@@ -117,6 +117,18 @@ module Scanners
utf8_decode utf8_encode var_dump var_export
version_compare
zend_logo_guid zend_thread_id zend_version
+ create_function call_user_func_array
+ posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid
+ posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups
+ posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid
+ posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid
+ posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod
+ posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid
+ posix_setuid posix_strerror posix_times posix_ttyname posix_uname
+ pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority
+ pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait
+ pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited
+ pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig
]
# TODO: more built-in PHP functions?
@@ -158,6 +170,12 @@ module Scanners
LOG_NDELAY LOG_NOWAIT LOG_PERROR
]
+ PREDEFINED = %w[
+ $GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV
+ $_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header
+ $argc $argv
+ ]
+
IDENT_KIND = CaseIgnoringWordList.new(:ident, true).
add(KEYWORDS, :reserved).
add(TYPES, :pre_type).
@@ -166,6 +184,9 @@ module Scanners
add(CLASSES, :pre_constant).
add(EXCEPTIONS, :exception).
add(CONSTANTS, :pre_constant)
+
+ VARIABLE_KIND = WordList.new(:local_variable).
+ add(PREDEFINED, :predefined)
end
module RE
@@ -194,7 +215,8 @@ module Scanners
\+\+ | -- | # increment, decrement
[,;?:()\[\]{}] | # simple delimiters
[-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts
- [~@$] | # whatever
+ [~$] | # whatever
+ =& | # reference assignment
[=!]=?=? | <> | # comparison and assignment
<<=? | >>=? | [<>]=? # comparison and shift
/x
@@ -203,17 +225,23 @@ module Scanners
def scan_tokens tokens, options
- states = [:initial]
- if match?(RE::PHP_START) || # starts with <?
+ if check(RE::PHP_START) || # starts with <?
(match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
- exist?(RE::HTML_INDICATOR)
- # is PHP inside HTML, so start with HTML
+ exist?(RE::HTML_INDICATOR) ||
+ check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
+ # is HTML with embedded PHP, so start with HTML
+ states = [:initial]
else
- states << :php
+ # is just PHP, so start with PHP surrounded by HTML
+ states = [:initial, :php]
end
- # heredocdelim = nil
+ label_expected = true
+ case_expected = false
+
+ heredoc_delimiter = nil
delimiter = nil
+ modifier = nil
until eos?
@@ -225,6 +253,7 @@ module Scanners
when :initial # HTML
if scan RE::PHP_START
kind = :inline_delimiter
+ label_expected = true
states << :php
else
match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
@@ -233,70 +262,75 @@ module Scanners
end
when :php
- if scan RE::PHP_END
- kind = :inline_delimiter
- states = [:initial]
-
- elsif scan(/\s+/)
- kind = :space
-
- elsif scan(/ \/\* (?: .*? \*\/ | .* ) /mx)
- kind = :comment
+ if match = scan(/\s+/)
+ tokens << [match, :space]
+ next
- elsif scan(%r!(?://|#).*?(?=#{RE::PHP_END}|$)!o)
+ elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
kind = :comment
elsif match = scan(RE::IDENTIFIER)
kind = Words::IDENT_KIND[match]
- if kind == :ident && check(/:(?!:)/) #&& tokens[-2][0] == 'case'
- # FIXME: don't match a?b:c
+ if kind == :ident && label_expected && check(/:(?!:)/)
kind = :label
- elsif kind == :ident && match =~ /^[A-Z]/
- kind = :constant
- elsif kind == :reserved && match == 'class'
- states << :class_expected
- elsif kind == :reserved && match == 'function'
- states << :function_expected
+ label_expected = true
+ else
+ label_expected = false
+ if kind == :ident && match =~ /^[A-Z]/
+ kind = :constant
+ elsif kind == :reserved
+ case match
+ when 'class'
+ states << :class_expected
+ when 'function'
+ states << :function_expected
+ when 'case', 'default'
+ case_expected = true
+ end
+ elsif match == 'b' && check(/['"]/) # binary string literal
+ modifier = match
+ next
+ end
end
elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
+ label_expected = false
kind = :float
elsif scan(/0x[0-9a-fA-F]+/)
+ label_expected = false
kind = :hex
elsif scan(/\d+/)
+ label_expected = false
kind = :integer
elsif scan(/'/)
tokens << [:open, :string]
+ if modifier
+ tokens << [modifier, :modifier]
+ modifier = nil
+ end
kind = :delimiter
states.push :sqstring
elsif match = scan(/["`]/)
tokens << [:open, :string]
+ if modifier
+ tokens << [modifier, :modifier]
+ modifier = nil
+ end
delimiter = match
kind = :delimiter
states.push :dqstring
- # TODO: Heredocs
- # See http://de2.php.net/manual/en/language.types.string.php#language.types.string.syntax.heredoc
- elsif match = scan(/<<<(#{RE::IDENTIFIER})/o)
- tokens << [:open, :string]
- heredocdelim = Regexp.escape self[1]
- tokens << [match, :delimiter]
- next if eos?
- tokens << [scan_until(/\n(?=#{heredocdelim};?$)|\z/), :content]
- next if eos?
- tokens << [scan(/#{heredocdelim}/), :delimiter]
- tokens << [:close, :string]
- next
-
- elsif scan RE::VARIABLE
- kind = :local_variable
+ elsif match = scan(RE::VARIABLE)
+ label_expected = false
+ kind = Words::VARIABLE_KIND[match]
elsif scan(/\{/)
kind = :operator
+ label_expected = true
states.push :php
elsif scan(/\}/)
@@ -312,10 +346,32 @@ module Scanners
next
else
kind = :operator
+ label_expected = true
end
end
- elsif scan(/#{RE::OPERATOR}/o)
+ elsif scan(/@/)
+ label_expected = false
+ kind = :exception
+
+ elsif scan RE::PHP_END
+ kind = :inline_delimiter
+ states = [:initial]
+
+ elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
+ tokens << [:open, :string]
+ warn 'heredoc in heredoc?' if heredoc_delimiter
+ heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
+ kind = :delimiter
+ states.push self[3] ? :sqstring : :dqstring
+ heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
+
+ elsif match = scan(/#{RE::OPERATOR}/o)
+ label_expected = match == ';'
+ if case_expected
+ label_expected = true if match == ':'
+ case_expected = false
+ end
kind = :operator
else
@@ -325,15 +381,27 @@ module Scanners
end
when :sqstring
- if scan(/[^'\\]+/)
+ if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
kind = :content
- elsif scan(/'/)
+ elsif !heredoc_delimiter && scan(/'/)
tokens << [matched, :delimiter]
tokens << [:close, :string]
delimiter = nil
+ label_expected = false
states.pop
next
- elsif scan(/\\[\\'\n]/)
+ elsif heredoc_delimiter && match = scan(/\n/)
+ kind = :content
+ if scan heredoc_delimiter
+ tokens << ["\n", :content]
+ tokens << [matched, :delimiter]
+ tokens << [:close, :string]
+ heredoc_delimiter = nil
+ label_expected = false
+ states.pop
+ next
+ end
+ elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
kind = :char
elsif scan(/\\./m)
kind = :content
@@ -342,17 +410,29 @@ module Scanners
end
when :dqstring
- if scan(delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/)
+ if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
kind = :content
- elsif scan(delimiter == '"' ? /"/ : /`/)
+ elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
tokens << [matched, :delimiter]
tokens << [:close, :string]
delimiter = nil
+ label_expected = false
states.pop
next
- elsif scan(/\\(?:x[0-9a-fA-F]{2}|\d{3})/)
+ elsif heredoc_delimiter && match = scan(/\n/)
+ kind = :content
+ if scan heredoc_delimiter
+ tokens << ["\n", :content]
+ tokens << [matched, :delimiter]
+ tokens << [:close, :string]
+ heredoc_delimiter = nil
+ label_expected = false
+ states.pop
+ next
+ end
+ elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
kind = :char
- elsif scan(delimiter == '"' ? /\\["\\\nfnrtv]/ : /\\[`\\\nfnrtv]/)
+ elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
kind = :char
elsif scan(/\\./m)
kind = :content
@@ -360,15 +440,24 @@ module Scanners
kind = :error
elsif match = scan(/#{RE::VARIABLE}/o)
kind = :local_variable
- # $foo[bar] and $foo->bar kind of stuff
- # TODO: highlight tokens separately!
if check(/\[#{RE::IDENTIFIER}\]/o)
- match << scan(/\[#{RE::IDENTIFIER}\]/o)
+ tokens << [:open, :inline]
+ tokens << [match, :local_variable]
+ tokens << [scan(/\[/), :operator]
+ tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
+ tokens << [scan(/\]/), :operator]
+ tokens << [:close, :inline]
+ next
elsif check(/\[/)
- match << scan(/\[#{RE::IDENTIFIER}?/o)
+ match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
kind = :error
elsif check(/->#{RE::IDENTIFIER}/o)
- match << scan(/->#{RE::IDENTIFIER}/o)
+ tokens << [:open, :inline]
+ tokens << [match, :local_variable]
+ tokens << [scan(/->/), :operator]
+ tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
+ tokens << [:close, :inline]
+ next
elsif check(/->/)
match << scan(/->/)
kind = :error