From 99308a6da363c78e656aa908f164e45d2f55bf36 Mon Sep 17 00:00:00 2001 From: murphy Date: Mon, 20 Apr 2009 23:41:03 +0000 Subject: Many improvements to PHP Scanner. * added lots of built-in functions, constants, syntax rule fixes... * automatic HTML/PHP document recognition; experimental! * cYcnus style adjusted; inline_delimiter outside of token group has a style now. * clean-up still not finished --- lib/coderay/scanners/php.rb | 322 +++++++++++++++++++++++++++++--------------- 1 file changed, 214 insertions(+), 108 deletions(-) (limited to 'lib/coderay/scanners/php.rb') diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb index 0912ea5..39e8eb4 100644 --- a/lib/coderay/scanners/php.rb +++ b/lib/coderay/scanners/php.rb @@ -27,55 +27,152 @@ module Scanners end module Words - ControlKeywords = %w! - if else elseif while do for switch case default declare foreach as - endif endwhile endfor endforeach endswitch enddeclare return break - continue exit die try catch throw - ! - OtherKeywords = %w! - function class extends implements instanceof parent self var const - private public protected static abstract final global new echo include - require include_once require_once eval print use unset isset empty - interface list array clone null true false - ! - - SpecialConstants = %w! __LINE__ __FILE__ __CLASS__ - __METHOD__ __FUNCTION__ - ! - IdentKinds = WordList.new(:ident). - add(ControlKeywords, :reserved). - add(OtherKeywords, :pre_type). - add(SpecialConstants, :pre_constant) + + # according to http://www.php.net/manual/en/reserved.keywords.php + KEYWORDS = %w[ + abstract and array as break case catch class clone + const continue declare default do else elseif + enddeclare endfor endforeach endif endswitch endwhile + extends final for foreach function global goto if implements + interface instanceof namespace new or + private protected public static switch throw try use var + while xor + cfunction old_function + null true false + ] + + LANGUAGE_CONSTRUCTS = %w[ + die echo empty exit eval include include_once isset list + require require_once return print unset + ] + + TYPES = %w[ int float ] + + # according to http://php.net/quickref.php on 2009-04-21; + # all functions with _ excluded (module functions) + BUILTIN_FUNCTIONS = %w[ + abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2 + atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec + bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite + calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog + compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex + decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each + ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract + fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner + fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv + fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate + getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid + getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext + gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate + gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell + gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate + implode include intval ip2long iptcembed iptcparse isset + jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp + juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log + log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort + natsort next ngettext nl2br nthmac octdec opendir openlog + ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow + prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile + readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir + serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget + snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat + strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen + strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn + strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam + textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack + unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap + ] + %w[ + assert_options base_convert base64_decode base64_encode + chunk_split class_exists class_implements class_parents + count_chars debug_backtrace debug_print_backtrace debug_zval_dump + error_get_last error_log error_reporting extension_loaded + file_exists file_get_contents file_put_contents load_file + func_get_arg func_get_args func_num_args function_exists + get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars + get_current_user get_declared_classes get_declared_interfaces get_defined_constants + get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table + get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime + get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type + gc_collect_cycles gc_disable gc_enable gc_enabled + halt_compiler headers_list headers_sent highlight_file highlight_string + html_entity_decode htmlspecialchars_decode + in_array include_once inclued_get_data + locale_get_default locale_set_default + number_format override_function parse_str parse_url + php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name + php_strip_whitespace php_uname + preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace + preg_replace_callback preg_split print_r + require_once register_shutdown_function register_tick_function + set_error_handler set_exception_handler set_file_buffer set_include_path + set_magic_quotes_runtime set_time_limit shell_exec + str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count + strip_tags substr_compare substr_count substr_replace + time_nanosleep time_sleep_until + token_get_all token_name trigger_error + unregister_tick_function use_soap_error_handler user_error + utf8_decode utf8_encode var_dump var_export + version_compare + zend_logo_guid zend_thread_id zend_version + ] + %w[ + array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc + array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip + array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey + array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad + array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift + array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect + array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk + array_walk_recursive + ] + %w[ + is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite + is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable + is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file + is_writable is_writeable + ] + + # TODO: more built-in PHP functions? + # TODO: more predefined constants? + + SPECIAL_CONSTANTS = %w[ + __LINE__ __DIR__ __FILE__ __LINE__ + __CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__ + ] + + IdentKinds = CaseIgnoringWordList.new(:ident, true). + add(KEYWORDS, :reserved). + add(TYPES, :pre_type). + add(LANGUAGE_CONSTRUCTS, :predefined). + add(BUILTIN_FUNCTIONS, :predefined). + add(SPECIAL_CONSTANTS, :pre_constant) end + module RE def self.build_alternatives(array) Regexp.new(array.map { |s| Regexp.escape(s) }.join('|') , Regexp::IGNORECASE) end - - PHPStart = / - | - \?> | - %> + \?> !xi - + IChar = /[a-z0-9_\x80-\xFF]/i IStart = /[a-z_\x80-\xFF]/i Identifier = /#{IStart}#{IChar}*/ Variable = /\$#{Identifier}/ - + Typecasts = build_alternatives %w! float double real int integer bool boolean string array object null !.map{|s| "(#{s})"} - OneLineComment1 = %r!//.*?(?=#{PHPEnd}|$)! - OneLineComment2 = %r!#.*?(?=#{PHPEnd}|$)! + OneLineComment1 = %r!//.*?(?=#{PHP_END}|$)! + OneLineComment2 = %r!#.*?(?=#{PHP_END}|$)! OneLineComment = OneLineComment1 | OneLineComment2 HereDoc = /<<