diff options
author | Kornelius Kalnbach <murphy@rubychan.de> | 2013-03-10 21:44:53 +0100 |
---|---|---|
committer | Kornelius Kalnbach <murphy@rubychan.de> | 2013-03-10 21:44:53 +0100 |
commit | af0d7d807c87097346584d06b7be59b5c5e656e2 (patch) | |
tree | 64b3eec10c23c920699115369c1ad01e74769537 /lib/coderay/scanners | |
parent | 46fc5486cde5cf1816cba945eb74eff5a8228aa7 (diff) | |
parent | 3d7f34571a0b2e58ee90498bc54f160bda2bed45 (diff) | |
download | coderay-af0d7d807c87097346584d06b7be59b5c5e656e2.tar.gz |
Merge branch 'master' into multiline-inline-diff
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r-- | lib/coderay/scanners/html.rb | 2 | ||||
-rw-r--r-- | lib/coderay/scanners/php.rb | 5 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby.rb | 33 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby/patterns.rb | 36 |
4 files changed, 42 insertions, 34 deletions
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 49c346d..3ba3b79 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -101,7 +101,7 @@ module Scanners when :initial if match = scan(/<!--(?:.*?-->|.*)/m) encoder.text_token match, :comment - elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m) + elsif match = scan(/<!(\w+)(?:.*?>|.*)|\]>/m) encoder.text_token match, :doctype elsif match = scan(/<\?xml(?:.*?\?>|.*)/m) encoder.text_token match, :preprocessor diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb index 8acfff5..6c68834 100644 --- a/lib/coderay/scanners/php.rb +++ b/lib/coderay/scanners/php.rb @@ -1,4 +1,4 @@ -# encoding: ASCII-8BIT +# encoding: utf-8 module CodeRay module Scanners @@ -11,7 +11,6 @@ module Scanners register_for :php file_extension 'php' - encoding 'BINARY' KINDS_NOT_LOC = HTML::KINDS_NOT_LOC @@ -211,7 +210,7 @@ module Scanners HTML_INDICATOR = /<!DOCTYPE html|<(?:html|body|div|p)[> ]/i - IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i + IDENTIFIER = 'ä'[/[[:alpha:]]/] == 'ä' ? Regexp.new('[[:alpha:]_[^\0-\177]][[:alnum:]_[^\0-\177]]*') : Regexp.new('[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*', true) VARIABLE = /\$#{IDENTIFIER}/ OPERATOR = / diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 2be98a6..c5cf1e2 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -94,18 +94,27 @@ module Scanners if !method_call_expected && match = scan(unicode ? /#{patterns::METHOD_NAME}/uo : /#{patterns::METHOD_NAME}/o) - value_expected = false + kind = patterns::IDENT_KIND[match] - if kind == :ident - if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/)) - kind = :constant + if kind == :ident && value_expected != :colon_expected && scan(/:(?!:)/) + value_expected = true + encoder.text_token match, :key + encoder.text_token ':', :operator + else + value_expected = false + if kind == :ident + if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/)) + kind = :constant + end + elsif kind == :keyword + state = patterns::KEYWORD_NEW_STATE[match] + if patterns::KEYWORDS_EXPECTING_VALUE[match] + value_expected = match == 'when' ? :colon_expected : true + end end - elsif kind == :keyword - state = patterns::KEYWORD_NEW_STATE[match] - value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match] + value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o) + encoder.text_token match, kind end - value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o) - encoder.text_token match, kind elsif method_call_expected && match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo : @@ -119,9 +128,9 @@ module Scanners value_expected = check(/#{patterns::VALUE_FOLLOWS}/o) # OPERATORS # - elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x) + elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | ( \.\.\.? | ==?=? | [,\(\[\{] ) | [\)\]\}] /x) method_call_expected = self[1] - value_expected = !method_call_expected && self[2] + value_expected = !method_call_expected && !!self[2] if inline_block_stack case match when '{' @@ -213,7 +222,7 @@ module Scanners encoder.text_token match, :integer elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x) - value_expected = true + value_expected = match == '?' ? :colon_expected : true encoder.text_token match, :operator elsif match = scan(/`/) diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index a52198e..ed071d2 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -1,9 +1,9 @@ # encoding: utf-8 module CodeRay module Scanners - + module Ruby::Patterns # :nodoc: all - + KEYWORDS = %w[ and def end in or unless begin defined? ensure module redo super until @@ -12,7 +12,7 @@ module Scanners while alias class elsif if not return undef yield ] - + # See http://murfy.de/ruby-constants. PREDEFINED_CONSTANTS = %w[ nil true false self @@ -24,19 +24,19 @@ module Scanners RUBY_PLATFORM RUBY_RELEASE_DATE RUBY_REVISION RUBY_VERSION __FILE__ __LINE__ __ENCODING__ ] - + IDENT_KIND = WordList.new(:ident). add(KEYWORDS, :keyword). add(PREDEFINED_CONSTANTS, :predefined_constant) - + KEYWORD_NEW_STATE = WordList.new(:initial). add(%w[ def ], :def_expected). add(%w[ undef ], :undef_expected). add(%w[ alias ], :alias_expected). add(%w[ class module ], :module_expected) - - IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/ - + + IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? Regexp.new('[[:alpha:]_[^\0-\177]][[:alnum:]_[^\0-\177]]*') : /[^\W\d]\w*/ + METHOD_NAME = / #{IDENT} [?!]? /ox METHOD_NAME_OPERATOR = / \*\*? # multiplication and power @@ -57,25 +57,25 @@ module Scanners GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox - + QUOTE_TO_TYPE = { '`' => :shell, '/'=> :regexp, } QUOTE_TO_TYPE.default = :string - + REGEXP_MODIFIERS = /[mousenix]*/ - + DECIMAL = /\d+(?:_\d+)*/ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ BINARY = /0b[01]+(?:_[01]+)*/ - + EXPONENT = / [eE] [+-]? #{DECIMAL} /ox FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox - + SYMBOL = / : (?: @@ -85,7 +85,7 @@ module Scanners ) /ox METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox - + SIMPLE_ESCAPE = / [abefnrstv] | [0-7]{1,3} @@ -110,7 +110,7 @@ module Scanners | \\ #{ESCAPE} ) /mox - + # NOTE: This is not completely correct, but # nobody needs heredoc delimiters ending with \n. HEREDOC_OPEN = / @@ -122,13 +122,13 @@ module Scanners ( [^\n]*? ) \3 # $4 = delim ) /mx - + RUBYDOC = / =begin (?!\S) .*? (?: \Z | ^=end (?!\S) [^\n]* ) /mx - + DATA = / __END__$ .*? @@ -136,7 +136,7 @@ module Scanners /mx RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo - + # Checks for a valid value to follow. This enables # value_expected in method calls without parentheses. VALUE_FOLLOWS = / |