summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners
diff options
context:
space:
mode:
authorKornelius Kalnbach <murphy@rubychan.de>2013-03-10 21:44:53 +0100
committerKornelius Kalnbach <murphy@rubychan.de>2013-03-10 21:44:53 +0100
commitaf0d7d807c87097346584d06b7be59b5c5e656e2 (patch)
tree64b3eec10c23c920699115369c1ad01e74769537 /lib/coderay/scanners
parent46fc5486cde5cf1816cba945eb74eff5a8228aa7 (diff)
parent3d7f34571a0b2e58ee90498bc54f160bda2bed45 (diff)
downloadcoderay-af0d7d807c87097346584d06b7be59b5c5e656e2.tar.gz
Merge branch 'master' into multiline-inline-diff
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r--lib/coderay/scanners/html.rb2
-rw-r--r--lib/coderay/scanners/php.rb5
-rw-r--r--lib/coderay/scanners/ruby.rb33
-rw-r--r--lib/coderay/scanners/ruby/patterns.rb36
4 files changed, 42 insertions, 34 deletions
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb
index 49c346d..3ba3b79 100644
--- a/lib/coderay/scanners/html.rb
+++ b/lib/coderay/scanners/html.rb
@@ -101,7 +101,7 @@ module Scanners
when :initial
if match = scan(/<!--(?:.*?-->|.*)/m)
encoder.text_token match, :comment
- elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
+ elsif match = scan(/<!(\w+)(?:.*?>|.*)|\]>/m)
encoder.text_token match, :doctype
elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
encoder.text_token match, :preprocessor
diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb
index 8acfff5..6c68834 100644
--- a/lib/coderay/scanners/php.rb
+++ b/lib/coderay/scanners/php.rb
@@ -1,4 +1,4 @@
-# encoding: ASCII-8BIT
+# encoding: utf-8
module CodeRay
module Scanners
@@ -11,7 +11,6 @@ module Scanners
register_for :php
file_extension 'php'
- encoding 'BINARY'
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
@@ -211,7 +210,7 @@ module Scanners
HTML_INDICATOR = /<!DOCTYPE html|<(?:html|body|div|p)[> ]/i
- IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i
+ IDENTIFIER = 'ä'[/[[:alpha:]]/] == 'ä' ? Regexp.new('[[:alpha:]_[^\0-\177]][[:alnum:]_[^\0-\177]]*') : Regexp.new('[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*', true)
VARIABLE = /\$#{IDENTIFIER}/
OPERATOR = /
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index 2be98a6..c5cf1e2 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -94,18 +94,27 @@ module Scanners
if !method_call_expected &&
match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
/#{patterns::METHOD_NAME}/o)
- value_expected = false
+
kind = patterns::IDENT_KIND[match]
- if kind == :ident
- if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
- kind = :constant
+ if kind == :ident && value_expected != :colon_expected && scan(/:(?!:)/)
+ value_expected = true
+ encoder.text_token match, :key
+ encoder.text_token ':', :operator
+ else
+ value_expected = false
+ if kind == :ident
+ if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
+ kind = :constant
+ end
+ elsif kind == :keyword
+ state = patterns::KEYWORD_NEW_STATE[match]
+ if patterns::KEYWORDS_EXPECTING_VALUE[match]
+ value_expected = match == 'when' ? :colon_expected : true
+ end
end
- elsif kind == :keyword
- state = patterns::KEYWORD_NEW_STATE[match]
- value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
+ value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
+ encoder.text_token match, kind
end
- value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
- encoder.text_token match, kind
elsif method_call_expected &&
match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
@@ -119,9 +128,9 @@ module Scanners
value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
# OPERATORS #
- elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
+ elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | ( \.\.\.? | ==?=? | [,\(\[\{] ) | [\)\]\}] /x)
method_call_expected = self[1]
- value_expected = !method_call_expected && self[2]
+ value_expected = !method_call_expected && !!self[2]
if inline_block_stack
case match
when '{'
@@ -213,7 +222,7 @@ module Scanners
encoder.text_token match, :integer
elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
- value_expected = true
+ value_expected = match == '?' ? :colon_expected : true
encoder.text_token match, :operator
elsif match = scan(/`/)
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb
index a52198e..ed071d2 100644
--- a/lib/coderay/scanners/ruby/patterns.rb
+++ b/lib/coderay/scanners/ruby/patterns.rb
@@ -1,9 +1,9 @@
# encoding: utf-8
module CodeRay
module Scanners
-
+
module Ruby::Patterns # :nodoc: all
-
+
KEYWORDS = %w[
and def end in or unless begin
defined? ensure module redo super until
@@ -12,7 +12,7 @@ module Scanners
while alias class elsif if not return
undef yield
]
-
+
# See http://murfy.de/ruby-constants.
PREDEFINED_CONSTANTS = %w[
nil true false self
@@ -24,19 +24,19 @@ module Scanners
RUBY_PLATFORM RUBY_RELEASE_DATE RUBY_REVISION RUBY_VERSION
__FILE__ __LINE__ __ENCODING__
]
-
+
IDENT_KIND = WordList.new(:ident).
add(KEYWORDS, :keyword).
add(PREDEFINED_CONSTANTS, :predefined_constant)
-
+
KEYWORD_NEW_STATE = WordList.new(:initial).
add(%w[ def ], :def_expected).
add(%w[ undef ], :undef_expected).
add(%w[ alias ], :alias_expected).
add(%w[ class module ], :module_expected)
-
- IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/
-
+
+ IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? Regexp.new('[[:alpha:]_[^\0-\177]][[:alnum:]_[^\0-\177]]*') : /[^\W\d]\w*/
+
METHOD_NAME = / #{IDENT} [?!]? /ox
METHOD_NAME_OPERATOR = /
\*\*? # multiplication and power
@@ -57,25 +57,25 @@ module Scanners
GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox
VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
-
+
QUOTE_TO_TYPE = {
'`' => :shell,
'/'=> :regexp,
}
QUOTE_TO_TYPE.default = :string
-
+
REGEXP_MODIFIERS = /[mousenix]*/
-
+
DECIMAL = /\d+(?:_\d+)*/
OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
BINARY = /0b[01]+(?:_[01]+)*/
-
+
EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
-
+
SYMBOL = /
:
(?:
@@ -85,7 +85,7 @@ module Scanners
)
/ox
METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox
-
+
SIMPLE_ESCAPE = /
[abefnrstv]
| [0-7]{1,3}
@@ -110,7 +110,7 @@ module Scanners
| \\ #{ESCAPE}
)
/mox
-
+
# NOTE: This is not completely correct, but
# nobody needs heredoc delimiters ending with \n.
HEREDOC_OPEN = /
@@ -122,13 +122,13 @@ module Scanners
( [^\n]*? ) \3 # $4 = delim
)
/mx
-
+
RUBYDOC = /
=begin (?!\S)
.*?
(?: \Z | ^=end (?!\S) [^\n]* )
/mx
-
+
DATA = /
__END__$
.*?
@@ -136,7 +136,7 @@ module Scanners
/mx
RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
-
+
# Checks for a valid value to follow. This enables
# value_expected in method calls without parentheses.
VALUE_FOLLOWS = /