diff options
Diffstat (limited to 'lib/coderay')
-rw-r--r-- | lib/coderay/scanners/_map.rb | 4 | ||||
-rw-r--r-- | lib/coderay/scanners/c.rb | 10 | ||||
-rw-r--r-- | lib/coderay/scanners/html.rb | 148 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby.rb | 4 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby/patterns.rb | 9 |
5 files changed, 164 insertions, 11 deletions
diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index 811546b..fc53d91 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -4,7 +4,9 @@ module CodeRay map :cpp => :c,
:plain => :plaintext,
:pascal => :delphi,
- :irb => :ruby
+ :irb => :ruby,
+ :xml => :html,
+ :xhtml => :html
end
end
diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb index 5764254..ae7ef83 100644 --- a/lib/coderay/scanners/c.rb +++ b/lib/coderay/scanners/c.rb @@ -42,7 +42,9 @@ module CodeRay module Scanners kind = :error
match = nil
- if state == :initial
+ case state
+
+ when :initial
if scan(/ \s+ | \\\n /x)
kind = :space
@@ -96,7 +98,7 @@ module CodeRay module Scanners getch
end
- elsif state == :string
+ when :string
if scan(/[^\\"]+/)
kind = :content
elsif scan(/"/)
@@ -113,7 +115,7 @@ module CodeRay module Scanners raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
- elsif state == :include_expected
+ when :include_expected
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
kind = :include
state = :initial
@@ -128,7 +130,7 @@ module CodeRay module Scanners end
else
- raise_inspect 'else-case reached', tokens
+ raise_inspect 'Unknown state', tokens
end
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb new file mode 100644 index 0000000..62da13b --- /dev/null +++ b/lib/coderay/scanners/html.rb @@ -0,0 +1,148 @@ +#require 'coderay/common_patterns'
+
+module CodeRay module Scanners
+
+ # HTML Scanner
+ class HTML < Scanner
+
+ include Streamable
+ register_for :html
+
+ ATTR_NAME = /[\w.:-]+/
+ ATTR_VALUE_UNQUOTED = ATTR_NAME
+ TAG_END = /\/?>/
+ HEX = /[0-9a-fA-F]/
+ ENTITY = /
+ &
+ (?:
+ \w+
+ |
+ \#
+ (?:
+ \d+
+ |
+ x#{HEX}+
+ )
+ )
+ ;
+ /ox
+
+ private
+ def scan_tokens tokens, options
+
+ state = :initial
+
+ until eos?
+
+ kind = :error
+ match = nil
+
+ if scan(/\s+/m)
+ kind = :space
+
+ else
+
+ case state
+
+ when :initial
+ if scan(/<!--.*?-->/m)
+ kind = :comment
+ elsif scan(/<!DOCTYPE.*?>/m)
+ kind = :preprocessor
+ elsif scan(/<\?xml.*?\?>/m)
+ kind = :preprocessor
+ elsif scan(/<\?.*?\?>|<%.*?%>/m)
+ kind = :comment
+ elsif scan(/<\/[-\w_.:]*>/m)
+ kind = :tag
+ elsif match = scan(/<[-\w_.:]*/m)
+ kind = :tag
+ if match?(/>/)
+ match << getch
+ else
+ state = :attribute
+ end
+ elsif scan(/[^<>&]+/)
+ kind = :plain
+ elsif scan(/#{ENTITY}/ox)
+ kind = :char
+ elsif scan(/>/)
+ kind = :error
+ else
+ raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
+ end
+
+ when :attribute
+ if scan(/#{TAG_END}/)
+ kind = :tag
+ state = :initial
+ elsif scan(/#{ATTR_NAME}/o)
+ kind = :attribute_name
+ state = :attribute_equal
+ end
+
+ when :attribute_equal
+ if scan(/=/)
+ kind = :operator
+ state = :attribute_value
+ elsif scan(/#{ATTR_NAME}/o)
+ kind = :attribute_name
+ elsif scan(/#{TAG_END}/o)
+ kind = :tag
+ state = :initial
+ elsif scan(/./)
+ state = :attribute
+ end
+
+ when :attribute_value
+ if scan(/#{ATTR_VALUE_UNQUOTED}/o)
+ kind = :attribute_value
+ state = :attribute
+ elsif scan(/"/)
+ tokens << [:open, :string]
+ state = :attribute_value_string
+ kind = :delimiter
+ elsif scan(/#{TAG_END}/o)
+ kind = :tag
+ state = :initial
+ end
+
+ when :attribute_value_string
+ if scan(/[^"&\n]+/)
+ kind = :content
+ elsif scan(/"/)
+ tokens << ['"', :delimiter]
+ tokens << [:close, :string]
+ state = :attribute
+ next
+ elsif scan(/#{ENTITY}/ox)
+ kind = :char
+ elsif match(/\n/)
+ tokens << [:close, :string]
+ state = :attribute
+ next
+ end
+
+ else
+ raise_inspect 'Unknown state: %p' % [state], tokens
+
+ end
+
+ end
+
+ match ||= matched
+ if $DEBUG and (not kind or kind == :error)
+ raise_inspect 'Error token %p in line %d' %
+ [[match, kind], line], tokens
+ end
+ raise_inspect 'Empty token', tokens unless match
+
+ tokens << [match, kind]
+ end
+
+ tokens
+ end
+
+ end
+
+end end
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index dd92caf..810e1fd 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -272,7 +272,7 @@ module CodeRay module Scanners heredocs ||= [] # create heredocs if empty
heredocs << heredoc
- elsif fancy_allowed and match = scan(/#{FANCY_START}/o)
+ elsif fancy_allowed and match = scan(/#{FANCY_START_SAVE}/o)
type, interpreted = *FancyStringType.fetch(self[1]) do
raise_inspect 'Unknown fancy string: %%%p' % k, tokens
end
@@ -358,6 +358,7 @@ module CodeRay module Scanners end
end
+# }}}
regexp_allowed = regexp_allowed == :set
fancy_allowed = fancy_allowed == :set
@@ -373,7 +374,6 @@ module CodeRay module Scanners state = last_state
last_state = nil
end
-# }}}
end
end
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index d75a17a..c007d8c 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -130,16 +130,17 @@ module CodeRay module Scanners RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
- FANCY_START = / % ( [qQwWxsr] | (?![\w\s=]) ) (.) /mox
+ # FIXME: \s and = are only a workaround, they are still allowed
+ # as delimiters.
+ FANCY_START_SAVE = / % ( [qQwWxsr] | (?![\w\s=]) ) (.) /mx
+ FANCY_START_CORRECT = / % ( [qQwWxsr] | (?!\w) ) (.) /mx
FancyStringType = {
'q' => [:string, false],
'Q' => [:string, true],
'r' => [:regexp, true],
's' => [:symbol, false],
- 'x' => [:shell, true],
- 'w' => [:string, :word],
- 'W' => [:string, :word],
+ 'x' => [:shell, true]
}
FancyStringType['w'] = FancyStringType['q']
FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
|