summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners
diff options
context:
space:
mode:
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r--lib/coderay/scanners/_map.rb4
-rw-r--r--lib/coderay/scanners/html.rb57
-rw-r--r--lib/coderay/scanners/nitro_html.rb123
-rw-r--r--lib/coderay/scanners/rhtml.rb63
-rw-r--r--lib/coderay/scanners/ruby.rb35
-rw-r--r--lib/coderay/scanners/ruby/patterns.rb2
6 files changed, 248 insertions, 36 deletions
diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb
index fc53d91..f6e4452 100644
--- a/lib/coderay/scanners/_map.rb
+++ b/lib/coderay/scanners/_map.rb
@@ -6,7 +6,9 @@ module CodeRay
:pascal => :delphi,
:irb => :ruby,
:xml => :html,
- :xhtml => :html
+ :xhtml => :nitro_html
+
+ default :plain
end
end
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb
index 62da13b..a1efa9e 100644
--- a/lib/coderay/scanners/html.rb
+++ b/lib/coderay/scanners/html.rb
@@ -1,8 +1,8 @@
-#require 'coderay/common_patterns'
-
module CodeRay module Scanners
# HTML Scanner
+ #
+ # $Id$
class HTML < Scanner
include Streamable
@@ -27,10 +27,21 @@ module CodeRay module Scanners
;
/ox
+ PLAIN_STRING_CONTENT = {
+ "'" => /[^&'>\n]+/,
+ '"' => /[^&">\n]+/,
+ }
+
private
+ def setup
+ @state = :initial
+ @plain_string_content = nil
+ end
+
def scan_tokens tokens, options
-
- state = :initial
+
+ state = @state
+ plain_string_content = @plain_string_content
until eos?
@@ -55,17 +66,13 @@ module CodeRay module Scanners
kind = :comment
elsif scan(/<\/[-\w_.:]*>/m)
kind = :tag
- elsif match = scan(/<[-\w_.:]*/m)
+ elsif match = scan(/<[-\w_.:]*>?/m)
kind = :tag
- if match?(/>/)
- match << getch
- else
- state = :attribute
- end
+ state = :attribute unless match[-1] == ?>
elsif scan(/[^<>&]+/)
kind = :plain
elsif scan(/#{ENTITY}/ox)
- kind = :char
+ kind = :entity
elsif scan(/>/)
kind = :error
else
@@ -79,6 +86,8 @@ module CodeRay module Scanners
elsif scan(/#{ATTR_NAME}/o)
kind = :attribute_name
state = :attribute_equal
+ else
+ getch
end
when :attribute_equal
@@ -98,29 +107,32 @@ module CodeRay module Scanners
if scan(/#{ATTR_VALUE_UNQUOTED}/o)
kind = :attribute_value
state = :attribute
- elsif scan(/"/)
+ elsif match = scan(/["']/)
tokens << [:open, :string]
state = :attribute_value_string
+ plain_string_content = PLAIN_STRING_CONTENT[match]
kind = :delimiter
elsif scan(/#{TAG_END}/o)
kind = :tag
state = :initial
+ else
+ getch
end
when :attribute_value_string
- if scan(/[^"&\n]+/)
+ if scan(plain_string_content)
kind = :content
- elsif scan(/"/)
- tokens << ['"', :delimiter]
+ elsif scan(/['"]/)
+ tokens << [matched, :delimiter]
tokens << [:close, :string]
state = :attribute
next
elsif scan(/#{ENTITY}/ox)
- kind = :char
- elsif match(/\n/)
+ kind = :entity
+ elsif match(/[\n>]/)
tokens << [:close, :string]
- state = :attribute
- next
+ kind = error
+ state = :initial
end
else
@@ -136,10 +148,15 @@ module CodeRay module Scanners
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
-
+
tokens << [match, kind]
end
+ if options[:keep_state]
+ @state = state
+ @plain_string_content = plain_string_content
+ end
+
tokens
end
diff --git a/lib/coderay/scanners/nitro_html.rb b/lib/coderay/scanners/nitro_html.rb
new file mode 100644
index 0000000..86d4992
--- /dev/null
+++ b/lib/coderay/scanners/nitro_html.rb
@@ -0,0 +1,123 @@
+module CodeRay module Scanners
+
+ load :html
+ load :ruby
+
+ # RHTML Scanner
+ #
+ # $Id$
+ class NitroHTML < Scanner
+
+ include Streamable
+ register_for :nitro_html
+
+ NITRO_RUBY_BLOCK = /
+ <\?r
+ (?>
+ [^\?]*
+ (?> \?(?!>) [^\?]* )*
+ )
+ (?: \?> )?
+ |
+ <ruby>
+ (?>
+ [^<]*
+ (?> <(?!\/ruby>) [^<]* )*
+ )
+ (?: <\/ruby> )?
+ |
+ <%
+ (?>
+ [^%]*
+ (?> %(?!>) [^%]* )*
+ )
+ (?: %> )?
+ /mx
+
+ NITRO_VALUE_BLOCK = /
+ \#
+ (?:
+ \{
+ [^{}]*
+ (?>
+ \{ [^}]* \}
+ (?> [^{}]* )
+ )*
+ \}?
+ | \| [^|]* \|?
+ | \( [^)]* \)?
+ | \[ [^\]]* \]?
+ | \\ [^\\]* \\?
+ )
+ /x
+
+ NITRO_ENTITY = /
+ % (?: \#\d+ | \w+ ) ;
+ /
+
+ START_OF_RUBY = /
+ (?=[<\#%])
+ < (?: \?r | % | ruby> )
+ | \# [{(|]
+ | % (?: \#\d+ | \w+ ) ;
+ /x
+
+ CLOSING_PAREN = Hash.new do |h, p|
+ h[p] = p
+ end.update( {
+ '(' => ')',
+ '[' => ']',
+ '{' => '}',
+ } )
+
+ private
+
+ def setup
+ @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
+ end
+
+ def scan_tokens tokens, options
+
+ until eos?
+
+ if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty?
+ @html_scanner.tokenize match
+
+ elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
+ start_tag = match[0,2]
+ delimiter = CLOSING_PAREN[start_tag[1,1]]
+ end_tag = match[-1,1] == delimiter ? delimiter : ''
+ tokens << [:open, :inline]
+ tokens << [start_tag, :delimiter]
+ code = match[start_tag.size .. -1 - end_tag.size]
+ @ruby_scanner.tokenize code
+ tokens << [end_tag, :delimiter] unless end_tag.empty?
+ tokens << [:close, :inline]
+
+ elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
+ start_tag = '<?r'
+ end_tag = match[-2,2] == '?>' ? '?>' : ''
+ tokens << [:open, :inline]
+ tokens << [start_tag, :delimiter]
+ code = match[start_tag.size .. -(end_tag.size)-1]
+ @ruby_scanner.tokenize code
+ tokens << [end_tag, :delimiter] unless end_tag.empty?
+ tokens << [:close, :inline]
+
+ elsif entity = scan(/#{NITRO_ENTITY}/o)
+ tokens << [entity, :entity]
+
+ else
+ raise_inspect 'else-case reached!', tokens
+ end
+
+ end
+
+ tokens
+
+ end
+
+ end
+
+end end
diff --git a/lib/coderay/scanners/rhtml.rb b/lib/coderay/scanners/rhtml.rb
new file mode 100644
index 0000000..77a4366
--- /dev/null
+++ b/lib/coderay/scanners/rhtml.rb
@@ -0,0 +1,63 @@
+module CodeRay module Scanners
+
+ load :html
+ load :ruby
+
+ # RHTML Scanner
+ #
+ # $Id$
+ class RHTML < Scanner
+
+ include Streamable
+ register_for :rhtml
+
+ ERB_RUBY_BLOCK = /
+ <%(?!%)[=-]?
+ (?>
+ [^%]*
+ (?> %(?!>) [^%]* )*
+ )
+ (?: %> )?
+ /x
+
+ START_OF_ERB = /
+ <%(?!%)
+ /x
+
+ private
+
+ def setup
+ @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
+ end
+
+ def scan_tokens tokens, options
+
+ until eos?
+
+ if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
+ @html_scanner.tokenize match
+
+ elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
+ start_tag = match[/\A<%[-=]?/]
+ end_tag = match[/%?>?\z/]
+ tokens << [:open, :inline]
+ tokens << [start_tag, :delimiter]
+ code = match[start_tag.size .. -1 - end_tag.size]
+ @ruby_scanner.tokenize code
+ tokens << [end_tag, :delimiter] unless end_tag.empty?
+ tokens << [:close, :inline]
+
+ else
+ raise_inspect 'else-case reached!', tokens
+ end
+
+ end
+
+ tokens
+
+ end
+
+ end
+
+end end
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index 810e1fd..9a33bef 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -36,12 +36,14 @@ module CodeRay module Scanners
depth = nil
states = []
+ c = self.class
+
until eos?
type = :error
match = nil
kind = nil
- if state.instance_of? StringState
+ if state.instance_of? c::StringState
# {{{
match = scan_until(state.pattern) || scan_until(/\z/)
tokens << [match, :content] unless match.empty?
@@ -74,7 +76,7 @@ module CodeRay module Scanners
tokens = saved_tokens
regexp = tokens
for text, type in regexp
- if text.is_a? String
+ if text.is_a? ::String
case type
when :content
text.scan(/([^#]+)|(#.*)/) do |plain, comment|
@@ -141,7 +143,7 @@ module CodeRay module Scanners
state.paren_depth += 1
tokens << [match, :nesting_delimiter]
- when REGEXP_SYMBOLS
+ when /#{REGEXP_SYMBOLS}/ox
tokens << [match, :function]
else
@@ -190,15 +192,15 @@ module CodeRay module Scanners
if last_token_dot
type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
else
- type = IDENT_KIND[match]
+ type = c::IDENT_KIND[match]
if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
type = :constant
elsif type == :reserved
- state = DEF_NEW_STATE[match]
+ state = c::DEF_NEW_STATE[match]
end
end
## experimental!
- fancy_allowed = regexp_allowed = :set if REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
+ fancy_allowed = regexp_allowed = :set if c::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
# OPERATORS #
elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or
@@ -226,7 +228,7 @@ module CodeRay module Scanners
elsif match = scan(/ ['"] /mx)
tokens << [:open, :string]
type = :delimiter
- state = StringState.new :string, match == '"', match # important for streaming
+ state = c::StringState.new :string, match == '"', match # important for streaming
elsif match = scan(/#{INSTANCE_VARIABLE}/o)
type = :instance_variable
@@ -235,7 +237,7 @@ module CodeRay module Scanners
tokens << [:open, :regexp]
type = :delimiter
interpreted = true
- state = StringState.new :regexp, interpreted, match
+ state = c::StringState.new :regexp, interpreted, match
if parse_regexp
tokens = []
saved_tokens = tokens
@@ -251,7 +253,7 @@ module CodeRay module Scanners
tokens << [':', :symbol]
match = delim.chr
type = :delimiter
- state = StringState.new :symbol, delim == ?", match
+ state = c::StringState.new :symbol, delim == ?", match
else
type = :symbol
end
@@ -264,11 +266,11 @@ module CodeRay module Scanners
indented = self[1] == '-'
quote = self[3]
delim = self[quote ? 4 : 2]
- type = QUOTE_TO_TYPE[quote]
+ type = c::QUOTE_TO_TYPE[quote]
tokens << [:open, type]
tokens << [match, :delimiter]
match = :close
- heredoc = StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
+ heredoc = c::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
heredocs ||= [] # create heredocs if empty
heredocs << heredoc
@@ -277,7 +279,7 @@ module CodeRay module Scanners
raise_inspect 'Unknown fancy string: %%%p' % k, tokens
end
tokens << [:open, type]
- state = StringState.new type, interpreted, self[2]
+ state = c::StringState.new type, interpreted, self[2]
type = :delimiter
elsif fancy_allowed and match = scan(/#{CHARACTER}/o)
@@ -293,7 +295,7 @@ module CodeRay module Scanners
else
tokens << [:open, :shell]
type = :delimiter
- state = StringState.new :shell, true, match
+ state = c::StringState.new :shell, true, match
end
elsif match = scan(/#{GLOBAL_VARIABLE}/o)
@@ -326,7 +328,7 @@ module CodeRay module Scanners
tokens << [':', :symbol]
match = delim.chr
type = :delimiter
- state = StringState.new :symbol, delim == ?", match
+ state = c::StringState.new :symbol, delim == ?", match
state.next_state = :undef_comma_expected
else
type = :symbol
@@ -377,6 +379,11 @@ module CodeRay module Scanners
end
end
+ states << state if state.is_a? c::StringState
+ until states.empty?
+ tokens << [:close, states.pop.type]
+ end
+
tokens
end
end
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb
index c007d8c..7bf9103 100644
--- a/lib/coderay/scanners/ruby/patterns.rb
+++ b/lib/coderay/scanners/ruby/patterns.rb
@@ -46,7 +46,7 @@ module CodeRay module Scanners
| <=?>? | >=? # comparison, rocket operator
| ===? # simple equality and case equality
/ox
- METHOD_NAME_EX = / #{IDENT} [?!=]? | #{METHOD_NAME_OPERATOR} /ox
+ METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
INSTANCE_VARIABLE = / @ #{IDENT} /ox
CLASS_VARIABLE = / @@ #{IDENT} /ox
OBJECT_VARIABLE = / @@? #{IDENT} /ox