summaryrefslogtreecommitdiff
path: root/lib/coderay
diff options
context:
space:
mode:
authorKornelius Kalnbach <murphy@rubychan.de>2013-06-23 16:06:02 +0200
committerKornelius Kalnbach <murphy@rubychan.de>2013-06-23 16:06:02 +0200
commit0013b649f714f23eef0859921fa7804ca7caef76 (patch)
tree7c278ee7c420729b4738fe2a195e529ffd2bb6da /lib/coderay
parentaddcbd446066d0da1627112814e3ce1b8d404da0 (diff)
parent64ca2ae8ad5130bdcf652aa7aa08298de00f20f4 (diff)
downloadcoderay-0013b649f714f23eef0859921fa7804ca7caef76.tar.gz
Merge branch 'master' into go-scanner
Conflicts: lib/coderay/helpers/file_type.rb
Diffstat (limited to 'lib/coderay')
-rw-r--r--lib/coderay/encoders/debug.rb23
-rw-r--r--lib/coderay/encoders/debug_lint.rb64
-rw-r--r--lib/coderay/encoders/html.rb1
-rw-r--r--lib/coderay/encoders/html/numbering.rb4
-rw-r--r--lib/coderay/encoders/statistic.rb1
-rw-r--r--lib/coderay/encoders/terminal.rb179
-rw-r--r--lib/coderay/helpers/file_type.rb101
-rw-r--r--lib/coderay/helpers/gzip.rb41
-rw-r--r--lib/coderay/scanners/css.rb4
-rw-r--r--lib/coderay/scanners/diff.rb2
-rw-r--r--lib/coderay/scanners/groovy.rb21
-rw-r--r--lib/coderay/scanners/html.rb54
-rw-r--r--lib/coderay/scanners/json.rb25
-rw-r--r--lib/coderay/scanners/lua.rb280
-rw-r--r--lib/coderay/scanners/php.rb26
-rw-r--r--lib/coderay/scanners/python.rb4
-rw-r--r--lib/coderay/scanners/raydebug.rb16
-rw-r--r--lib/coderay/scanners/ruby.rb2
-rw-r--r--lib/coderay/scanners/sass.rb12
-rw-r--r--lib/coderay/scanners/sql.rb9
-rw-r--r--lib/coderay/scanners/yaml.rb4
-rw-r--r--lib/coderay/styles/alpha.rb48
-rwxr-xr-xlib/coderay/token_kinds.rb137
-rw-r--r--lib/coderay/tokens.rb99
24 files changed, 742 insertions, 415 deletions
diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb
index c03d3fb..f4db330 100644
--- a/lib/coderay/encoders/debug.rb
+++ b/lib/coderay/encoders/debug.rb
@@ -9,7 +9,6 @@ module Encoders
#
# You cannot fully restore the tokens information from the
# output, because consecutive :space tokens are merged.
- # Use Tokens#dump for caching purposes.
#
# See also: Scanners::Debug
class Debug < Encoder
@@ -18,38 +17,26 @@ module Encoders
FILE_EXTENSION = 'raydebug'
- def initialize options = {}
- super
- @opened = []
- end
-
def text_token text, kind
- raise 'empty token' if $CODERAY_DEBUG && text.empty?
if kind == :space
@out << text
else
- # TODO: Escape (
- text = text.gsub(/[)\\]/, '\\\\\0') if text.index(/[)\\]/)
- @out << kind.to_s << '(' << text << ')'
+ text = text.gsub('\\', '\\\\\\\\') if text.index('\\')
+ text = text.gsub(')', '\\\\)') if text.index(')')
+ @out << "#{kind}(#{text})"
end
end
def begin_group kind
- @opened << kind
- @out << kind.to_s << '<'
+ @out << "#{kind}<"
end
def end_group kind
- if @opened.last != kind
- puts @out
- raise "we are inside #{@opened.inspect}, not #{kind}"
- end
- @opened.pop
@out << '>'
end
def begin_line kind
- @out << kind.to_s << '['
+ @out << "#{kind}["
end
def end_line kind
diff --git a/lib/coderay/encoders/debug_lint.rb b/lib/coderay/encoders/debug_lint.rb
new file mode 100644
index 0000000..17a0795
--- /dev/null
+++ b/lib/coderay/encoders/debug_lint.rb
@@ -0,0 +1,64 @@
+module CodeRay
+module Encoders
+
+ # = Debug Lint Encoder
+ #
+ # Debug encoder with additional checks for:
+ #
+ # - empty tokens
+ # - incorrect nesting
+ #
+ # It will raise an InvalidTokenStream exception when any of the above occurs.
+ #
+ # See also: Encoders::Debug
+ class DebugLint < Debug
+
+ register_for :debug_lint
+
+ InvalidTokenStream = Class.new StandardError
+ EmptyToken = Class.new InvalidTokenStream
+ IncorrectTokenGroupNesting = Class.new InvalidTokenStream
+
+ def text_token text, kind
+ raise EmptyToken, 'empty token' if text.empty?
+ super
+ end
+
+ def begin_group kind
+ @opened << kind
+ super
+ end
+
+ def end_group kind
+ raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_group)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind
+ @opened.pop
+ super
+ end
+
+ def begin_line kind
+ @opened << kind
+ super
+ end
+
+ def end_line kind
+ raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_line)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind
+ @opened.pop
+ super
+ end
+
+ protected
+
+ def setup options
+ super
+ @opened = []
+ end
+
+ def finish options
+ raise 'Some tokens still open at end of token stream: %p' % [@opened] unless @opened.empty?
+ super
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb
index b897f5e..20f2409 100644
--- a/lib/coderay/encoders/html.rb
+++ b/lib/coderay/encoders/html.rb
@@ -193,7 +193,6 @@ module Encoders
def finish options
unless @opened.empty?
- warn '%d tokens still open: %p' % [@opened.size, @opened] if $CODERAY_DEBUG
@out << '</span>' while @opened.pop
@last_opened = nil
end
diff --git a/lib/coderay/encoders/html/numbering.rb b/lib/coderay/encoders/html/numbering.rb
index 332145b..a1b9c04 100644
--- a/lib/coderay/encoders/html/numbering.rb
+++ b/lib/coderay/encoders/html/numbering.rb
@@ -26,7 +26,7 @@ module Encoders
"<a href=\"##{anchor}\" name=\"#{anchor}\">#{line}</a>"
end
else
- proc { |line| line.to_s } # :to_s.to_proc in Ruby 1.8.7+
+ :to_s.to_proc
end
bold_every = options[:bold_every]
@@ -75,7 +75,7 @@ module Encoders
line_number = start
output.gsub!(/^.*$\n?/) do |line|
line_number_text = bolding.call line_number
- indent = ' ' * (max_width - line_number.to_s.size) # TODO: Optimize (10^x)
+ indent = ' ' * (max_width - line_number.to_s.size)
line_number += 1
"<span class=\"line-numbers\">#{indent}#{line_number_text}</span>#{line}"
end
diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb
index 2315d9e..b2f8b83 100644
--- a/lib/coderay/encoders/statistic.rb
+++ b/lib/coderay/encoders/statistic.rb
@@ -67,7 +67,6 @@ Token Types (%d):
@type_stats['TOTAL'].count += 1
end
- # TODO Hierarchy handling
def begin_group kind
block_token ':begin_group', kind
end
diff --git a/lib/coderay/encoders/terminal.rb b/lib/coderay/encoders/terminal.rb
index 9894b91..c7ae014 100644
--- a/lib/coderay/encoders/terminal.rb
+++ b/lib/coderay/encoders/terminal.rb
@@ -19,105 +19,135 @@ module CodeRay
register_for :terminal
TOKEN_COLORS = {
- :annotation => "\e[35m",
- :attribute_name => "\e[33m",
+ :debug => "\e[1;37;44m",
+
+ :annotation => "\e[34m",
+ :attribute_name => "\e[35m",
:attribute_value => "\e[31m",
- :binary => "\e[1;35m",
+ :binary => {
+ :self => "\e[31m",
+ :char => "\e[1;31m",
+ :delimiter => "\e[1;31m",
+ },
:char => {
- :self => "\e[36m", :delimiter => "\e[1;34m"
+ :self => "\e[35m",
+ :delimiter => "\e[1;35m"
},
- :class => "\e[1;35m",
+ :class => "\e[1;35;4m",
:class_variable => "\e[36m",
:color => "\e[32m",
- :comment => "\e[37m",
- :complex => "\e[1;34m",
- :constant => "\e[1;34m\e[4m",
- :decoration => "\e[35m",
- :definition => "\e[1;32m",
- :directive => "\e[32m\e[4m",
- :doc => "\e[46m",
- :doctype => "\e[1;30m",
- :docstring => "\e[31m\e[4m",
- :entity => "\e[33m",
- :error => "\e[1;33m\e[41m",
+ :comment => {
+ :self => "\e[1;30m",
+ :char => "\e[37m",
+ :delimiter => "\e[37m",
+ },
+ :constant => "\e[1;34;4m",
+ :decorator => "\e[35m",
+ :definition => "\e[1;33m",
+ :directive => "\e[33m",
+ :docstring => "\e[31m",
+ :doctype => "\e[1;34m",
+ :done => "\e[1;30;2m",
+ :entity => "\e[31m",
+ :error => "\e[1;37;41m",
:exception => "\e[1;31m",
:float => "\e[1;35m",
:function => "\e[1;34m",
- :global_variable => "\e[42m",
+ :global_variable => "\e[1;32m",
:hex => "\e[1;36m",
- :include => "\e[33m",
+ :id => "\e[1;34m",
+ :include => "\e[31m",
:integer => "\e[1;34m",
- :key => "\e[35m",
- :label => "\e[1;15m",
+ :imaginary => "\e[1;34m",
+ :important => "\e[1;31m",
+ :key => {
+ :self => "\e[35m",
+ :char => "\e[1;35m",
+ :delimiter => "\e[1;35m",
+ },
+ :keyword => "\e[32m",
+ :label => "\e[1;33m",
:local_variable => "\e[33m",
- :octal => "\e[1;35m",
- :operator_name => "\e[1;29m",
+ :namespace => "\e[1;35m",
+ :octal => "\e[1;34m",
+ :predefined => "\e[36m",
:predefined_constant => "\e[1;36m",
- :predefined_type => "\e[1;30m",
- :predefined => "\e[4m\e[1;34m",
- :preprocessor => "\e[36m",
+ :predefined_type => "\e[1;32m",
+ :preprocessor => "\e[1;36m",
:pseudo_class => "\e[1;34m",
:regexp => {
- :self => "\e[31m",
- :content => "\e[31m",
- :delimiter => "\e[1;29m",
+ :self => "\e[35m",
+ :delimiter => "\e[1;35m",
:modifier => "\e[35m",
+ :char => "\e[1;35m",
},
- :reserved => "\e[1;31m",
+ :reserved => "\e[32m",
:shell => {
- :self => "\e[42m",
- :content => "\e[1;29m",
- :delimiter => "\e[37m",
+ :self => "\e[33m",
+ :char => "\e[1;33m",
+ :delimiter => "\e[1;33m",
+ :escape => "\e[1;33m",
},
:string => {
- :self => "\e[32m",
- :modifier => "\e[1;32m",
- :escape => "\e[1;36m",
- :delimiter => "\e[1;32m",
- :char => "\e[1;36m",
+ :self => "\e[31m",
+ :modifier => "\e[1;31m",
+ :char => "\e[1;35m",
+ :delimiter => "\e[1;31m",
+ :escape => "\e[1;31m",
+ },
+ :symbol => {
+ :self => "\e[33m",
+ :delimiter => "\e[1;33m",
},
- :symbol => "\e[1;32m",
- :tag => "\e[1;34m",
+ :tag => "\e[32m",
:type => "\e[1;34m",
:value => "\e[36m",
- :variable => "\e[1;34m",
+ :variable => "\e[34m",
- :insert => "\e[42m",
- :delete => "\e[41m",
- :change => "\e[44m",
- :head => "\e[45m"
+ :insert => {
+ :self => "\e[42m",
+ :insert => "\e[1;32;42m",
+ :eyecatcher => "\e[102m",
+ },
+ :delete => {
+ :self => "\e[41m",
+ :delete => "\e[1;31;41m",
+ :eyecatcher => "\e[101m",
+ },
+ :change => {
+ :self => "\e[44m",
+ :change => "\e[37;44m",
+ },
+ :head => {
+ :self => "\e[45m",
+ :filename => "\e[37;45m"
+ },
}
+
TOKEN_COLORS[:keyword] = TOKEN_COLORS[:reserved]
TOKEN_COLORS[:method] = TOKEN_COLORS[:function]
- TOKEN_COLORS[:imaginary] = TOKEN_COLORS[:complex]
- TOKEN_COLORS[:begin_group] = TOKEN_COLORS[:end_group] =
- TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
+ TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
protected
def setup(options)
super
@opened = []
- @subcolors = nil
+ @color_scopes = [TOKEN_COLORS]
end
public
def text_token text, kind
- if color = (@subcolors || TOKEN_COLORS)[kind]
- if Hash === color
- if color[:self]
- color = color[:self]
- else
- @out << text
- return
- end
- end
+ if color = @color_scopes.last[kind]
+ color = color[:self] if color.is_a? Hash
@out << color
- @out << text.gsub("\n", "\e[0m\n" + color)
+ @out << (text.index("\n") ? text.gsub("\n", "\e[0m\n" + color) : text)
@out << "\e[0m"
- @out << @subcolors[:self] if @subcolors
+ if outer_color = @color_scopes.last[:self]
+ @out << outer_color
+ end
else
@out << text
end
@@ -130,40 +160,33 @@ module CodeRay
alias begin_line begin_group
def end_group kind
- if @opened.empty?
- # nothing to close
- else
- @opened.pop
+ if @opened.pop
+ @color_scopes.pop
@out << "\e[0m"
- @out << open_token(@opened.last)
+ if outer_color = @color_scopes.last[:self]
+ @out << outer_color
+ end
end
end
def end_line kind
- if @opened.empty?
- # nothing to close
- else
- @opened.pop
- # whole lines to be highlighted,
- # eg. added/modified/deleted lines in a diff
- @out << (@line_filler ||= "\t" * 100 + "\e[0m")
- @out << open_token(@opened.last)
- end
+ @out << (@line_filler ||= "\t" * 100)
+ end_group kind
end
private
def open_token kind
- if color = TOKEN_COLORS[kind]
- if Hash === color
- @subcolors = color
+ if color = @color_scopes.last[kind]
+ if color.is_a? Hash
+ @color_scopes << color
color[:self]
else
- @subcolors = {}
+ @color_scopes << @color_scopes.last
color
end
else
- @subcolors = nil
+ @color_scopes << @color_scopes.last
''
end
end
diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb
index 1a43924..9c36b62 100644
--- a/lib/coderay/helpers/file_type.rb
+++ b/lib/coderay/helpers/file_type.rb
@@ -77,55 +77,58 @@ module CodeRay
end
TypeFromExt = {
- 'c' => :c,
- 'cfc' => :xml,
- 'cfm' => :xml,
- 'clj' => :clojure,
- 'css' => :css,
- 'diff' => :diff,
- 'dpr' => :delphi,
- 'erb' => :erb,
- 'gemspec' => :ruby,
- 'go' => :go,
- 'groovy' => :groovy,
- 'gvy' => :groovy,
- 'h' => :c,
- 'haml' => :haml,
- 'htm' => :html,
- 'html' => :html,
- 'html.erb' => :erb,
- 'java' => :java,
- 'js' => :java_script,
- 'json' => :json,
- 'mab' => :ruby,
- 'pas' => :delphi,
- 'patch' => :diff,
- 'phtml' => :php,
- 'php' => :php,
- 'php3' => :php,
- 'php4' => :php,
- 'php5' => :php,
- 'prawn' => :ruby,
- 'py' => :python,
- 'py3' => :python,
- 'pyw' => :python,
- 'rake' => :ruby,
- 'raydebug' => :raydebug,
- 'rb' => :ruby,
- 'rbw' => :ruby,
- 'rhtml' => :erb,
- 'rjs' => :ruby,
- 'rpdf' => :ruby,
- 'ru' => :ruby,
- 'rxml' => :ruby,
- 'sass' => :sass,
- 'sql' => :sql,
- 'tmproj' => :xml,
- 'xaml' => :xml,
- 'xhtml' => :html,
- 'xml' => :xml,
- 'yaml' => :yaml,
- 'yml' => :yaml,
+ 'c' => :c,
+ 'cfc' => :xml,
+ 'cfm' => :xml,
+ 'clj' => :clojure,
+ 'css' => :css,
+ 'diff' => :diff,
+ 'dpr' => :delphi,
+ 'erb' => :erb,
+ 'gemspec' => :ruby,
+ 'go' => :go,
+ 'groovy' => :groovy,
+ 'gvy' => :groovy,
+ 'h' => :c,
+ 'haml' => :haml,
+ 'htm' => :html,
+ 'html' => :html,
+ 'html.erb' => :erb,
+ 'java' => :java,
+ 'js' => :java_script,
+ 'json' => :json,
+ 'lua' => :lua,
+ 'mab' => :ruby,
+ 'pas' => :delphi,
+ 'patch' => :diff,
+ 'phtml' => :php,
+ 'php' => :php,
+ 'php3' => :php,
+ 'php4' => :php,
+ 'php5' => :php,
+ 'prawn' => :ruby,
+ 'py' => :python,
+ 'py3' => :python,
+ 'pyw' => :python,
+ 'rake' => :ruby,
+ 'raydebug' => :raydebug,
+ 'rb' => :ruby,
+ 'rbw' => :ruby,
+ 'rhtml' => :erb,
+ 'rjs' => :ruby,
+ 'rpdf' => :ruby,
+ 'ru' => :ruby,
+ 'rxml' => :ruby,
+ 'sass' => :sass,
+ 'sql' => :sql,
+ 'taskpaper' => :taskpaper,
+ 'template' => :json, # AWS CloudFormation template
+ 'tmproj' => :xml,
+ 'xaml' => :xml,
+ 'xhtml' => :html,
+ 'xml' => :xml,
+ 'yaml' => :yaml,
+ 'yml' => :yaml,
}
for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu]
TypeFromExt[cpp_alias] = :cpp
diff --git a/lib/coderay/helpers/gzip.rb b/lib/coderay/helpers/gzip.rb
deleted file mode 100644
index 245014a..0000000
--- a/lib/coderay/helpers/gzip.rb
+++ /dev/null
@@ -1,41 +0,0 @@
-module CodeRay
-
- # A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.)
- module GZip
-
- require 'zlib'
-
- # The default zipping level. 7 zips good and fast.
- DEFAULT_GZIP_LEVEL = 7
-
- # Unzips the given string +s+.
- #
- # Example:
- # require 'gzip_simple'
- # print GZip.gunzip(File.read('adresses.gz'))
- def GZip.gunzip s
- Zlib::Inflate.inflate s
- end
-
- # Zips the given string +s+.
- #
- # Example:
- # require 'gzip_simple'
- # File.open('adresses.gz', 'w') do |file
- # file.write GZip.gzip('Mum: 0123 456 789', 9)
- # end
- #
- # If you provide a +level+, you can control how strong
- # the string is compressed:
- # - 0: no compression, only convert to gzip format
- # - 1: compress fast
- # - 7: compress more, but still fast (default)
- # - 8: compress more, slower
- # - 9: compress best, very slow
- def GZip.gzip s, level = DEFAULT_GZIP_LEVEL
- Zlib::Deflate.new(level).deflate s, Zlib::FINISH
- end
-
- end
-
-end
diff --git a/lib/coderay/scanners/css.rb b/lib/coderay/scanners/css.rb
index 732f9c5..9ed4618 100644
--- a/lib/coderay/scanners/css.rb
+++ b/lib/coderay/scanners/css.rb
@@ -145,10 +145,10 @@ module Scanners
start = match[/^\w+\(/]
encoder.text_token start, :delimiter
if match[-1] == ?)
- encoder.text_token match[start.size..-2], :content
+ encoder.text_token match[start.size..-2], :content if match.size > start.size + 1
encoder.text_token ')', :delimiter
else
- encoder.text_token match[start.size..-1], :content
+ encoder.text_token match[start.size..-1], :content if match.size > start.size
end
encoder.end_group :function
diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb
index af0f755..fd1aed6 100644
--- a/lib/coderay/scanners/diff.rb
+++ b/lib/coderay/scanners/diff.rb
@@ -69,7 +69,7 @@ module Scanners
state = :added
elsif match = scan(/\\ .*/)
encoder.text_token match, :comment
- elsif match = scan(/@@(?>[^@\n]*)@@/)
+ elsif match = scan(/@@(?>[^@\n]+)@@/)
content_scanner.state = :initial unless match?(/\n\+/)
content_scanner_entry_state = nil
if check(/\n|$/)
diff --git a/lib/coderay/scanners/groovy.rb b/lib/coderay/scanners/groovy.rb
index cf55daf..c64454f 100644
--- a/lib/coderay/scanners/groovy.rb
+++ b/lib/coderay/scanners/groovy.rb
@@ -36,9 +36,12 @@ module Scanners
protected
+ def setup
+ @state = :initial
+ end
+
def scan_tokens encoder, options
-
- state = :initial
+ state = options[:state] || @state
inline_block_stack = []
inline_block_paren_depth = nil
string_delimiter = nil
@@ -223,7 +226,7 @@ module Scanners
encoder.text_token match, :content # TODO: Shouldn't this be :error?
elsif match = scan(/ \\ | \n /x)
- encoder.end_group state
+ encoder.end_group state == :regexp ? :regexp : :string
encoder.text_token match, :error
after_def = value_expected = false
state = :initial
@@ -243,7 +246,17 @@ module Scanners
end
if [:multiline_string, :string, :regexp].include? state
- encoder.end_group state
+ encoder.end_group state == :regexp ? :regexp : :string
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+ until inline_block_stack.empty?
+ state, = *inline_block_stack.pop
+ encoder.end_group :inline
+ encoder.end_group state == :regexp ? :regexp : :string
end
encoder
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb
index 3ba3b79..ebe7b01 100644
--- a/lib/coderay/scanners/html.rb
+++ b/lib/coderay/scanners/html.rb
@@ -1,13 +1,13 @@
module CodeRay
module Scanners
-
+
# HTML Scanner
#
# Alias: +xhtml+
#
# See also: Scanners::XML
class HTML < Scanner
-
+
register_for :html
KINDS_NOT_LOC = [
@@ -33,7 +33,8 @@ module Scanners
)
IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil).
- add(EVENT_ATTRIBUTES, :script)
+ add(EVENT_ATTRIBUTES, :script).
+ add(['style'], :style)
ATTR_NAME = /[\w.:-]+/ # :nodoc:
TAG_END = /\/?>/ # :nodoc:
@@ -75,9 +76,14 @@ module Scanners
def scan_java_script encoder, code
if code && !code.empty?
@java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
- # encoder.begin_group :inline
@java_script_scanner.tokenize code, :tokens => encoder
- # encoder.end_group :inline
+ end
+ end
+
+ def scan_css encoder, code, state = [:initial]
+ if code && !code.empty?
+ @css_scanner ||= Scanners::CSS.new '', :keep_tokens => true
+ @css_scanner.tokenize code, :tokens => encoder, :state => state
end
end
@@ -99,7 +105,15 @@ module Scanners
case state
when :initial
- if match = scan(/<!--(?:.*?-->|.*)/m)
+ if match = scan(/<!\[CDATA\[/)
+ encoder.text_token match, :inline_delimiter
+ if match = scan(/.*?\]\]>/m)
+ encoder.text_token match[0..-4], :plain
+ encoder.text_token ']]>', :inline_delimiter
+ elsif match = scan(/.+/)
+ encoder.text_token match, :error
+ end
+ elsif match = scan(/<!--(?:.*?-->|.*)/m)
encoder.text_token match, :comment
elsif match = scan(/<!(\w+)(?:.*?>|.*)|\]>/m)
encoder.text_token match, :doctype
@@ -110,7 +124,7 @@ module Scanners
elsif match = scan(/<\/[-\w.:]*>?/m)
in_tag = nil
encoder.text_token match, :tag
- elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
+ elsif match = scan(/<(?:(script|style)|[-\w.:]+)(>)?/m)
encoder.text_token match, :tag
in_tag = self[1]
if self[2]
@@ -161,17 +175,21 @@ module Scanners
encoder.text_token match, :attribute_value
state = :attribute
elsif match = scan(/["']/)
- if in_attribute == :script
- encoder.begin_group :inline
- encoder.text_token match, :inline_delimiter
+ if in_attribute == :script || in_attribute == :style
+ encoder.begin_group :string
+ encoder.text_token match, :delimiter
if scan(/javascript:[ \t]*/)
encoder.text_token matched, :comment
end
code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
- scan_java_script encoder, code
+ if in_attribute == :script
+ scan_java_script encoder, code
+ else
+ scan_css encoder, code, [:block]
+ end
match = scan(/["']/)
- encoder.text_token match, :inline_delimiter if match
- encoder.end_group :inline
+ encoder.text_token match, :delimiter if match
+ encoder.end_group :string
state = :attribute
in_attribute = nil
else
@@ -206,19 +224,23 @@ module Scanners
when :in_special_tag
case in_tag
- when 'script'
+ when 'script', 'style'
encoder.text_token match, :space if match = scan(/[ \t]*\n/)
if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
code = self[2] || self[4]
closing = self[3]
encoder.text_token self[1], :comment
else
- code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
+ code = scan_until(/(?=(?:\n\s*)?<\/#{in_tag}>)|\z/)
closing = false
end
unless code.empty?
encoder.begin_group :inline
- scan_java_script encoder, code
+ if in_tag == 'script'
+ scan_java_script encoder, code
+ else
+ scan_css encoder, code
+ end
encoder.end_group :inline
end
encoder.text_token closing, :comment if closing
diff --git a/lib/coderay/scanners/json.rb b/lib/coderay/scanners/json.rb
index 4e0f462..b09970c 100644
--- a/lib/coderay/scanners/json.rb
+++ b/lib/coderay/scanners/json.rb
@@ -14,15 +14,21 @@ module Scanners
ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
+ KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /x
protected
+ def setup
+ @state = :initial
+ end
+
# See http://json.org/ for a definition of the JSON lexic/grammar.
def scan_tokens encoder, options
+ state = options[:state] || @state
- state = :initial
- stack = []
- key_expected = false
+ if [:string, :key].include? state
+ encoder.begin_group state
+ end
until eos?
@@ -32,18 +38,11 @@ module Scanners
if match = scan(/ \s+ /x)
encoder.text_token match, :space
elsif match = scan(/"/)
- state = key_expected ? :key : :string
+ state = check(/#{KEY}/o) ? :key : :string
encoder.begin_group state
encoder.text_token match, :delimiter
elsif match = scan(/ [:,\[{\]}] /x)
encoder.text_token match, :operator
- case match
- when ':' then key_expected = false
- when ',' then key_expected = true if stack.last == :object
- when '{' then stack << :object; key_expected = true
- when '[' then stack << :array
- when '}', ']' then stack.pop # no error recovery, but works for valid JSON
- end
elsif match = scan(/ true | false | null /x)
encoder.text_token match, :value
elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
@@ -82,6 +81,10 @@ module Scanners
end
end
+ if options[:keep_state]
+ @state = state
+ end
+
if [:string, :key].include? state
encoder.end_group state
end
diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb
new file mode 100644
index 0000000..fb1e45a
--- /dev/null
+++ b/lib/coderay/scanners/lua.rb
@@ -0,0 +1,280 @@
+# encoding: utf-8
+
+module CodeRay
+module Scanners
+
+ # Scanner for the Lua[http://lua.org] programming lanuage.
+ #
+ # The language’s complete syntax is defined in
+ # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
+ # which is what this scanner tries to conform to.
+ class Lua < Scanner
+
+ register_for :lua
+ file_extension 'lua'
+ title 'Lua'
+
+ # Keywords used in Lua.
+ KEYWORDS = %w[and break do else elseif end
+ for function goto if in
+ local not or repeat return
+ then until while
+ ]
+
+ # Constants set by the Lua core.
+ PREDEFINED_CONSTANTS = %w[false true nil]
+
+ # The expressions contained in this array are parts of Lua’s `basic'
+ # library. Although it’s not entirely necessary to load that library,
+ # it is highly recommended and one would have to provide own implementations
+ # of some of these expressions if one does not do so. They however aren’t
+ # keywords, neither are they constants, but nearly predefined, so they
+ # get tagged as `predefined' rather than anything else.
+ #
+ # This list excludes values of form `_UPPERCASE' because the Lua manual
+ # requires such identifiers to be reserved by Lua anyway and they are
+ # highlighted directly accordingly, without the need for specific
+ # identifiers to be listed here.
+ PREDEFINED_EXPRESSIONS = %w[
+ assert collectgarbage dofile error getmetatable
+ ipairs load loadfile next pairs pcall print
+ rawequal rawget rawlen rawset select setmetatable
+ tonumber tostring type xpcall
+ ]
+
+ # Automatic token kind selection for normal words.
+ IDENT_KIND = CodeRay::WordList.new(:ident).
+ add(KEYWORDS, :keyword).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(PREDEFINED_EXPRESSIONS, :predefined)
+
+ protected
+
+ # Scanner initialization.
+ def setup
+ @state = :initial
+ @brace_depth = 0
+ end
+
+ # CodeRay entry hook. Starts parsing.
+ def scan_tokens(encoder, options)
+ state = options[:state] || @state
+ brace_depth = @brace_depth
+ num_equals = nil
+
+ until eos?
+ case state
+
+ when :initial
+ if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]]
+ num_equals = match.count("=") # Number must match for comment end
+ encoder.begin_group(:comment)
+ encoder.text_token(match, :delimiter)
+ state = :long_comment
+
+ elsif match = scan(/--.*$/) # --Lua comment
+ encoder.text_token(match, :comment)
+
+ elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]]
+ num_equals = match.count("=") # Number must match for comment end
+ encoder.begin_group(:string)
+ encoder.text_token(match, :delimiter)
+ state = :long_string
+
+ elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label::
+ encoder.text_token(match, :label)
+
+ elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua
+ encoder.text_token(match, :predefined)
+
+ elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits)
+ kind = IDENT_KIND[match]
+
+ # Extra highlighting for entities following certain keywords
+ if kind == :keyword and match == "function"
+ state = :function_expected
+ elsif kind == :keyword and match == "goto"
+ state = :goto_label_expected
+ elsif kind == :keyword and match == "local"
+ state = :local_var_expected
+ end
+
+ encoder.text_token(match, kind)
+
+ elsif match = scan(/\{/) # Opening table brace {
+ encoder.begin_group(:map)
+ encoder.text_token(match, brace_depth >= 1 ? :inline_delimiter : :delimiter)
+ brace_depth += 1
+ state = :map
+
+ elsif match = scan(/\}/) # Closing table brace }
+ if brace_depth == 1
+ brace_depth = 0
+ encoder.text_token(match, :delimiter)
+ encoder.end_group(:map)
+ elsif brace_depth == 0 # Mismatched brace
+ encoder.text_token(match, :error)
+ else
+ brace_depth -= 1
+ encoder.text_token(match, :inline_delimiter)
+ encoder.end_group(:map)
+ state = :map
+ end
+
+ elsif match = scan(/["']/) # String delimiters " and '
+ encoder.begin_group(:string)
+ encoder.text_token(match, :delimiter)
+ start_delim = match
+ state = :string
+
+ # ↓Prefix hex number ←|→ decimal number
+ elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
+ encoder.text_token(match, :float)
+
+ # ↓Prefix hex number ←|→ decimal number
+ elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
+ encoder.text_token(match, :integer)
+
+ elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators
+ encoder.text_token(match, :operator)
+
+ elsif match = scan(/\s+/) # Space
+ encoder.text_token(match, :space)
+
+ else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors.
+ encoder.text_token(getch, :error)
+ end
+
+ # It may be that we’re scanning a full-blown subexpression of a table
+ # (tables can contain full expressions in parts).
+ # If this is the case, return to :map scanning state.
+ state = :map if state == :initial && brace_depth >= 1
+
+ when :function_expected
+ if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name
+ encoder.text_token(match, :operator)
+ state = :initial
+ elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
+ encoder.text_token(match, :ident)
+ elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo()
+ encoder.text_token(match, :function)
+ state = :initial
+ elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace
+ encoder.text_token(match, :space)
+ else
+ encoder.text_token(getch, :error)
+ state = :initial
+ end
+
+ when :goto_label_expected
+ if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
+ encoder.text_token(match, :label)
+ state = :initial
+ elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace
+ encoder.text_token(match, :space)
+ else
+ encoder.text_token(getch, :error)
+ end
+
+ when :local_var_expected
+ if match = scan(/function/) # local function ...
+ encoder.text_token(match, :keyword)
+ state = :function_expected
+ elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
+ encoder.text_token(match, :local_variable)
+ elsif match = scan(/,/)
+ encoder.text_token(match, :operator)
+ elsif match = scan(/\=/)
+ encoder.text_token(match, :operator)
+ # After encountering the equal sign, arbitrary expressions are
+ # allowed again, so just return to the main state for further
+ # parsing.
+ state = :initial
+ elsif match = scan(/\n/)
+ encoder.text_token(match, :space)
+ state = :initial
+ elsif match = scan(/\s+/)
+ encoder.text_token(match, :space)
+ else
+ encoder.text_token(getch, :error)
+ end
+
+ when :long_comment
+ if match = scan(/.*?(?=\]={#{num_equals}}\])/m)
+ encoder.text_token(match, :content)
+
+ delim = scan(/\]={#{num_equals}}\]/)
+ encoder.text_token(delim, :delimiter)
+ else # No terminator found till EOF
+ encoder.text_token(rest, :error)
+ terminate
+ end
+ encoder.end_group(:comment)
+ state = :initial
+
+ when :long_string
+ if match = scan(/.*?(?=\]={#{num_equals}}\])/m) # Long strings do not interpret any escape sequences
+ encoder.text_token(match, :content)
+
+ delim = scan(/\]={#{num_equals}}\]/)
+ encoder.text_token(delim, :delimiter)
+ else # No terminator found till EOF
+ encoder.text_token(rest, :error)
+ terminate
+ end
+ encoder.end_group(:string)
+ state = :initial
+
+ when :string
+ if match = scan(/[^\\#{start_delim}\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
+ encoder.text_token(match, :content)
+ elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m)
+ encoder.text_token(match, :char)
+ elsif match = scan(Regexp.compile(start_delim))
+ encoder.text_token(match, :delimiter)
+ encoder.end_group(:string)
+ state = :initial
+ elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings
+ encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
+ encoder.end_group(:string)
+ state = :initial
+ else
+ encoder.text_token(getch, :error)
+ end
+
+ when :map
+ if match = scan(/[,;]/)
+ encoder.text_token(match, :operator)
+ elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x)
+ encoder.text_token(match, :key)
+ encoder.text_token(scan(/\s+/), :space) if check(/\s+/)
+ encoder.text_token(scan(/\=/), :operator)
+ state = :initial
+ elsif match = scan(/\s+/m)
+ encoder.text_token(match, :space)
+ else
+ # Note this clause doesn’t advance the scan pointer, it’s a kind of
+ # "retry with other options" (the :initial state then of course
+ # advances the pointer).
+ state = :initial
+ end
+ else
+ raise
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+ encoder.end_group :string if [:string].include? state
+ brace_depth.times { encoder.end_group :map }
+
+ encoder
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb
index 6c68834..7a8d75d 100644
--- a/lib/coderay/scanners/php.rb
+++ b/lib/coderay/scanners/php.rb
@@ -265,7 +265,7 @@ module Scanners
@html_scanner.tokenize match unless match.empty?
end
- when :php
+ when :php, :php_inline
if match = scan(/\s+/)
encoder.text_token match, :space
@@ -332,7 +332,7 @@ module Scanners
if states.size == 1
encoder.text_token match, :error
else
- states.pop
+ state = states.pop
if states.last.is_a?(::Array)
delimiter = states.last[1]
states[-1] = states.last[0]
@@ -340,6 +340,7 @@ module Scanners
encoder.end_group :inline
else
encoder.text_token match, :operator
+ encoder.end_group :inline if state == :php_inline
label_expected = true
end
end
@@ -350,7 +351,14 @@ module Scanners
elsif match = scan(RE::PHP_END)
encoder.text_token match, :inline_delimiter
- states = [:initial]
+ while state = states.pop
+ encoder.end_group :string if [:sqstring, :dqstring].include? state
+ if state.is_a? Array
+ encoder.end_group :inline
+ encoder.end_group :string if [:sqstring, :dqstring].include? state.first
+ end
+ end
+ states << :initial
elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
encoder.begin_group :string
@@ -400,6 +408,7 @@ module Scanners
elsif match = scan(/\\/)
encoder.text_token match, :error
else
+ encoder.end_group :string
states.pop
end
@@ -459,7 +468,7 @@ module Scanners
encoder.begin_group :inline
states[-1] = [states.last, delimiter]
delimiter = nil
- states.push :php
+ states.push :php_inline
encoder.text_token match, :delimiter
else
encoder.text_token match, :content
@@ -469,6 +478,7 @@ module Scanners
elsif match = scan(/\$/)
encoder.text_token match, :content
else
+ encoder.end_group :string
states.pop
end
@@ -500,6 +510,14 @@ module Scanners
end
+ while state = states.pop
+ encoder.end_group :string if [:sqstring, :dqstring].include? state
+ if state.is_a? Array
+ encoder.end_group :inline
+ encoder.end_group :string if [:sqstring, :dqstring].include? state.first
+ end
+ end
+
encoder
end
diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb
index a9492ab..09c8b6e 100644
--- a/lib/coderay/scanners/python.rb
+++ b/lib/coderay/scanners/python.rb
@@ -157,12 +157,12 @@ module Scanners
encoder.text_token match, :operator
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
+ modifiers = self[1]
string_delimiter = self[2]
- string_type = docstring_coming ? :docstring : :string
+ string_type = docstring_coming ? :docstring : (modifiers == 'b' ? :binary : :string)
docstring_coming = false if docstring_coming
encoder.begin_group string_type
string_raw = false
- modifiers = self[1]
unless modifiers.empty?
string_raw = !!modifiers.index(?r)
encoder.text_token modifiers, :modifier
diff --git a/lib/coderay/scanners/raydebug.rb b/lib/coderay/scanners/raydebug.rb
index 7a21354..d39d962 100644
--- a/lib/coderay/scanners/raydebug.rb
+++ b/lib/coderay/scanners/raydebug.rb
@@ -1,11 +1,11 @@
module CodeRay
module Scanners
-
+
# = Debug Scanner
#
# Parses the output of the Encoders::Debug encoder.
class Raydebug < Scanner
-
+
register_for :raydebug
file_extension 'raydebug'
title 'CodeRay Token Dump'
@@ -13,11 +13,11 @@ module Scanners
protected
def scan_tokens encoder, options
-
+
opened_tokens = []
-
+
until eos?
-
+
if match = scan(/\s+/)
encoder.text_token match, :space
@@ -26,7 +26,7 @@ module Scanners
encoder.text_token kind, :class
encoder.text_token '(', :operator
match = self[2]
- encoder.text_token match, kind.to_sym
+ encoder.text_token match, kind.to_sym unless match.empty?
encoder.text_token match, :operator if match = scan(/\)/)
elsif match = scan(/ (\w+) ([<\[]) /x)
@@ -59,8 +59,8 @@ module Scanners
encoder
end
-
+
end
-
+
end
end
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index c282f31..80165ca 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -269,7 +269,7 @@ module Scanners
end
if last_state
- state = last_state
+ state = last_state unless state.is_a?(StringState) # otherwise, a simple 'def"' results in unclosed tokens
last_state = nil
end
diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb
index 167051d..e20bebe 100644
--- a/lib/coderay/scanners/sass.rb
+++ b/lib/coderay/scanners/sass.rb
@@ -176,7 +176,7 @@ module Scanners
encoder.text_token match[start.size..-2], :content
encoder.text_token ')', :delimiter
else
- encoder.text_token match[start.size..-1], :content
+ encoder.text_token match[start.size..-1], :content if start.size < match.size
end
encoder.end_group :function
@@ -195,7 +195,7 @@ module Scanners
elsif match = scan(/(?:rgb|hsl)a?\([^()\n]*\)?/)
encoder.text_token match, :color
- elsif match = scan(/@else if\b|#{RE::AtKeyword}/)
+ elsif match = scan(/@else if\b|#{RE::AtKeyword}/o)
encoder.text_token match, :directive
value_expected = true
@@ -218,6 +218,14 @@ module Scanners
@state = states
end
+ while state = states.pop
+ if state == :sass_inline
+ encoder.end_group :inline
+ elsif state == :string
+ encoder.end_group :string
+ end
+ end
+
encoder
end
diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb
index b757278..93aeaf3 100644
--- a/lib/coderay/scanners/sql.rb
+++ b/lib/coderay/scanners/sql.rb
@@ -1,8 +1,9 @@
-module CodeRay module Scanners
+module CodeRay
+module Scanners
# by Josh Goebel
class SQL < Scanner
-
+
register_for :sql
KEYWORDS = %w(
@@ -149,6 +150,7 @@ module CodeRay module Scanners
string_content = ''
end
encoder.text_token match, :error unless match.empty?
+ encoder.end_group :string
state = :initial
else
raise "else case \" reached; %p not handled." % peek(1), encoder
@@ -171,4 +173,5 @@ module CodeRay module Scanners
end
-end end \ No newline at end of file
+end
+end
diff --git a/lib/coderay/scanners/yaml.rb b/lib/coderay/scanners/yaml.rb
index 96f4e93..32c8e2c 100644
--- a/lib/coderay/scanners/yaml.rb
+++ b/lib/coderay/scanners/yaml.rb
@@ -47,7 +47,7 @@ module Scanners
when !check(/(?:"[^"]*")(?=: |:$)/) && match = scan(/"/)
encoder.begin_group :string
encoder.text_token match, :delimiter
- encoder.text_token match, :content if match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)
+ encoder.text_token match, :content if (match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)) && !match.empty?
encoder.text_token match, :delimiter if match = scan(/"/)
encoder.end_group :string
next
@@ -84,7 +84,7 @@ module Scanners
when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?= *:(?: |$))/)
encoder.begin_group :key
encoder.text_token match[0,1], :delimiter
- encoder.text_token match[1..-2], :content
+ encoder.text_token match[1..-2], :content if match.size > 2
encoder.text_token match[-1,1], :delimiter
encoder.end_group :key
key_indent = column(pos - match.size) - 1
diff --git a/lib/coderay/styles/alpha.rb b/lib/coderay/styles/alpha.rb
index f57e4a1..ff85ecc 100644
--- a/lib/coderay/styles/alpha.rb
+++ b/lib/coderay/styles/alpha.rb
@@ -3,14 +3,14 @@ module Styles
# A colorful theme using CSS 3 colors (with alpha channel).
class Alpha < Style
-
+
register_for :alpha
-
+
code_background = 'hsl(0,0%,95%)'
numbers_background = 'hsl(180,65%,90%)'
border_color = 'silver'
normal_color = 'black'
-
+
CSS_MAIN_STYLES = <<-MAIN # :nodoc:
.CodeRay {
background-color: #{code_background};
@@ -56,25 +56,26 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
.annotation { color:#007 }
.attribute-name { color:#b48 }
.attribute-value { color:#700 }
-.binary { color:#509 }
+.binary { color:#549 }
+.binary .char { color:#325 }
+.binary .delimiter { color:#325 }
+.char { color:#D20 }
.char .content { color:#D20 }
.char .delimiter { color:#710 }
-.char { color:#D20 }
.class { color:#B06; font-weight:bold }
.class-variable { color:#369 }
.color { color:#0A0 }
.comment { color:#777 }
.comment .char { color:#444 }
.comment .delimiter { color:#444 }
-.complex { color:#A08 }
.constant { color:#036; font-weight:bold }
.decorator { color:#B0B }
.definition { color:#099; font-weight:bold }
.delimiter { color:black }
.directive { color:#088; font-weight:bold }
-.doc { color:#970 }
-.doc-string { color:#D42; font-weight:bold }
+.docstring { color:#D42; }
.doctype { color:#34b }
+.done { text-decoration: line-through; color: gray }
.entity { color:#800; font-weight:bold }
.error { color:#F00; background-color:#FAA }
.escape { color:#666 }
@@ -85,19 +86,22 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
.global-variable { color:#d70 }
.hex { color:#02b }
.id { color:#33D; font-weight:bold }
-.imaginary { color:#f00 }
.include { color:#B44; font-weight:bold }
.inline { background-color: hsla(0,0%,0%,0.07); color: black }
.inline-delimiter { font-weight: bold; color: #666 }
.instance-variable { color:#33B }
.integer { color:#00D }
+.imaginary { color:#f00 }
.important { color:#D00 }
+.key { color: #606 }
.key .char { color: #60f }
.key .delimiter { color: #404 }
-.key { color: #606 }
.keyword { color:#080; font-weight:bold }
.label { color:#970; font-weight:bold }
-.local-variable { color:#963 }
+.local-variable { color:#950 }
+.map .content { color:#808 }
+.map .delimiter { color:#40A}
+.map { background-color:hsla(200,100%,50%,0.06); }
.namespace { color:#707; font-weight:bold }
.octal { color:#40E }
.operator { }
@@ -106,30 +110,30 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
.predefined-type { color:#0a5; font-weight:bold }
.preprocessor { color:#579 }
.pseudo-class { color:#00C; font-weight:bold }
+.regexp { background-color:hsla(300,100%,50%,0.06); }
.regexp .content { color:#808 }
.regexp .delimiter { color:#404 }
.regexp .modifier { color:#C2C }
-.regexp { background-color:hsla(300,100%,50%,0.06); }
.reserved { color:#080; font-weight:bold }
+.shell { background-color:hsla(120,100%,50%,0.06); }
.shell .content { color:#2B2 }
.shell .delimiter { color:#161 }
-.shell { background-color:hsla(120,100%,50%,0.06); }
+.string { background-color:hsla(0,100%,50%,0.05); }
.string .char { color: #b0b }
.string .content { color: #D20 }
.string .delimiter { color: #710 }
.string .modifier { color: #E40 }
-.string { background-color:hsla(0,100%,50%,0.05); }
+.symbol { color:#A60 }
.symbol .content { color:#A60 }
.symbol .delimiter { color:#630 }
-.symbol { color:#A60 }
-.tag { color:#070 }
+.tag { color:#070; font-weight:bold }
.type { color:#339; font-weight:bold }
-.value { color: #088; }
-.variable { color:#037 }
+.value { color: #088 }
+.variable { color:#037 }
.insert { background: hsla(120,100%,50%,0.12) }
.delete { background: hsla(0,100%,50%,0.12) }
-.change { color: #bbf; background: #007; }
+.change { color: #bbf; background: #007 }
.head { color: #f8f; background: #505 }
.head .filename { color: white; }
@@ -140,11 +144,9 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
.delete .delete { color: #c00; background:transparent; font-weight:bold }
.change .change { color: #88f }
.head .head { color: #f4f }
-
-.done { text-decoration: line-through; color: gray }
TOKENS
-
+
end
-
+
end
end
diff --git a/lib/coderay/token_kinds.rb b/lib/coderay/token_kinds.rb
index de3a0d0..9137a49 100755
--- a/lib/coderay/token_kinds.rb
+++ b/lib/coderay/token_kinds.rb
@@ -10,79 +10,78 @@ module CodeRay
TokenKinds.compare_by_identity if TokenKinds.respond_to? :compare_by_identity
TokenKinds.update( # :nodoc:
- :annotation => 'annotation',
- :attribute_name => 'attribute-name',
- :attribute_value => 'attribute-value',
- :binary => 'bin',
- :char => 'char',
- :class => 'class',
- :class_variable => 'class-variable',
- :color => 'color',
- :comment => 'comment',
- :complex => 'complex',
- :constant => 'constant',
- :content => 'content',
- :debug => 'debug',
- :decorator => 'decorator',
- :definition => 'definition',
- :delimiter => 'delimiter',
- :directive => 'directive',
- :doc => 'doc',
- :doctype => 'doctype',
- :docstring => 'doc-string',
- :done => 'done',
- :entity => 'entity',
- :error => 'error',
- :escape => 'escape',
- :exception => 'exception',
- :filename => 'filename',
- :float => 'float',
- :function => 'function',
- :global_variable => 'global-variable',
- :hex => 'hex',
- :id => 'id',
- :imaginary => 'imaginary',
- :important => 'important',
- :include => 'include',
- :inline => 'inline',
- :inline_delimiter => 'inline-delimiter',
- :instance_variable => 'instance-variable',
- :integer => 'integer',
- :key => 'key',
- :keyword => 'keyword',
- :label => 'label',
- :local_variable => 'local-variable',
- :modifier => 'modifier',
- :namespace => 'namespace',
- :octal => 'octal',
- :predefined => 'predefined',
- :predefined_constant => 'predefined-constant',
- :predefined_type => 'predefined-type',
- :preprocessor => 'preprocessor',
- :pseudo_class => 'pseudo-class',
- :regexp => 'regexp',
- :reserved => 'reserved',
- :shell => 'shell',
- :string => 'string',
- :symbol => 'symbol',
- :tag => 'tag',
- :type => 'type',
- :value => 'value',
- :variable => 'variable',
+ :debug => 'debug', # highlight for debugging (white on blue background)
- :change => 'change',
- :delete => 'delete',
- :head => 'head',
- :insert => 'insert',
+ :annotation => 'annotation', # Groovy, Java
+ :attribute_name => 'attribute-name', # HTML, CSS
+ :attribute_value => 'attribute-value', # HTML
+ :binary => 'binary', # Python, Ruby
+ :char => 'char', # most scanners, also inside of strings
+ :class => 'class', # lots of scanners, for different purposes also in CSS
+ :class_variable => 'class-variable', # Ruby, YAML
+ :color => 'color', # CSS
+ :comment => 'comment', # most scanners
+ :constant => 'constant', # PHP, Ruby
+ :content => 'content', # inside of strings, most scanners
+ :decorator => 'decorator', # Python
+ :definition => 'definition', # CSS
+ :delimiter => 'delimiter', # inside strings, comments and other types
+ :directive => 'directive', # lots of scanners
+ :doctype => 'doctype', # Goorvy, HTML, Ruby, YAML
+ :docstring => 'docstring', # Python
+ :done => 'done', # Taskpaper
+ :entity => 'entity', # HTML
+ :error => 'error', # invalid token, most scanners
+ :escape => 'escape', # Ruby (string inline variables like #$foo, #@bar)
+ :exception => 'exception', # Java, PHP, Python
+ :filename => 'filename', # Diff
+ :float => 'float', # most scanners
+ :function => 'function', # CSS, JavaScript, PHP
+ :global_variable => 'global-variable', # Ruby, YAML
+ :hex => 'hex', # hexadecimal number; lots of scanners
+ :id => 'id', # CSS
+ :imaginary => 'imaginary', # Python
+ :important => 'important', # CSS, Taskpaper
+ :include => 'include', # C, Groovy, Java, Python, Sass
+ :inline => 'inline', # nested code, eg. inline string evaluation; lots of scanners
+ :inline_delimiter => 'inline-delimiter', # used instead of :inline > :delimiter FIXME: Why use inline_delimiter?
+ :instance_variable => 'instance-variable', # Ruby
+ :integer => 'integer', # most scanners
+ :key => 'key', # lots of scanners, used together with :value
+ :keyword => 'keyword', # reserved word that's actually implemented; most scanners
+ :label => 'label', # C, PHP
+ :local_variable => 'local-variable', # local and magic variables; some scanners
+ :map => 'map', # Lua tables
+ :modifier => 'modifier', # used inside on strings; lots of scanners
+ :namespace => 'namespace', # Clojure, Java, Taskpaper
+ :octal => 'octal', # lots of scanners
+ :predefined => 'predefined', # predefined function: lots of scanners
+ :predefined_constant => 'predefined-constant',# lots of scanners
+ :predefined_type => 'predefined-type', # C, Java, PHP
+ :preprocessor => 'preprocessor', # C, Delphi, HTML
+ :pseudo_class => 'pseudo-class', # CSS
+ :regexp => 'regexp', # Groovy, JavaScript, Ruby
+ :reserved => 'reserved', # most scanners
+ :shell => 'shell', # Ruby
+ :string => 'string', # most scanners
+ :symbol => 'symbol', # Clojure, Ruby, YAML
+ :tag => 'tag', # CSS, HTML
+ :type => 'type', # CSS, Java, SQL, YAML
+ :value => 'value', # used together with :key; CSS, JSON, YAML
+ :variable => 'variable', # Sass, SQL, YAML
- :eyecatcher => 'eyecatcher',
+ :change => 'change', # Diff
+ :delete => 'delete', # Diff
+ :head => 'head', # Diff, YAML
+ :insert => 'insert', # Diff
+ :eyecatcher => 'eyecatcher', # Diff
- :ident => false,
- :operator => false,
+ :ident => false, # almost all scanners
+ :operator => false, # almost all scanners
- :space => false,
- :plain => false
+ :space => false, # almost all scanners
+ :plain => false # almost all scanners
)
- TokenKinds[:method] = TokenKinds[:function]
+ TokenKinds[:method] = TokenKinds[:function]
end
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index 6957d69..e7bffce 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -1,55 +1,43 @@
module CodeRay
- # GZip library for writing and reading token dumps.
- autoload :GZip, coderay_path('helpers', 'gzip')
-
- # = Tokens TODO: Rewrite!
- #
- # The Tokens class represents a list of tokens returnd from
- # a Scanner.
+ # The Tokens class represents a list of tokens returned from
+ # a Scanner. It's actually just an Array with a few helper methods.
#
- # A token is not a special object, just a two-element Array
- # consisting of
+ # A token itself is not a special object, just two elements in an Array:
# * the _token_ _text_ (the original source of the token in a String) or
# a _token_ _action_ (begin_group, end_group, begin_line, end_line)
# * the _token_ _kind_ (a Symbol representing the type of the token)
#
- # A token looks like this:
+ # It looks like this:
#
- # ['# It looks like this', :comment]
- # ['3.1415926', :float]
- # ['$^', :error]
+ # ..., '# It looks like this', :comment, ...
+ # ..., '3.1415926', :float, ...
+ # ..., '$^', :error, ...
#
# Some scanners also yield sub-tokens, represented by special
- # token actions, namely begin_group and end_group.
+ # token actions, for example :begin_group and :end_group.
#
# The Ruby scanner, for example, splits "a string" into:
#
# [
- # [:begin_group, :string],
- # ['"', :delimiter],
- # ['a string', :content],
- # ['"', :delimiter],
- # [:end_group, :string]
+ # :begin_group, :string,
+ # '"', :delimiter,
+ # 'a string', :content,
+ # '"', :delimiter,
+ # :end_group, :string
# ]
#
- # Tokens is the interface between Scanners and Encoders:
- # The input is split and saved into a Tokens object. The Encoder
- # then builds the output from this object.
- #
- # Thus, the syntax below becomes clear:
+ # Tokens can be used to save the output of a Scanners in a simple
+ # Ruby object that can be send to an Encoder later:
#
- # CodeRay.scan('price = 2.59', :ruby).html
- # # the Tokens object is here -------^
- #
- # See how small it is? ;)
+ # tokens = CodeRay.scan('price = 2.59', :ruby).tokens
+ # tokens.encode(:html)
+ # tokens.html
+ # CodeRay.encoder(:html).encode_tokens(tokens)
#
# Tokens gives you the power to handle pre-scanned code very easily:
- # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
- # that you put in your DB.
- #
- # It also allows you to generate tokens directly (without using a scanner),
- # to load them from a file, and still use any Encoder that CodeRay provides.
+ # You can serialize it to a JSON string and store it in a database, pass it
+ # around to encode it more than once, send it to other algorithms...
class Tokens < Array
# The Scanner instance that created the tokens.
@@ -58,8 +46,7 @@ module CodeRay
# Encode the tokens using encoder.
#
# encoder can be
- # * a symbol like :html oder :statistic
- # * an Encoder class
+ # * a plugin name like :html oder 'statistic'
# * an Encoder object
#
# options are passed to the encoder.
@@ -157,53 +144,11 @@ module CodeRay
parts
end
- # Dumps the object into a String that can be saved
- # in files or databases.
- #
- # The dump is created with Marshal.dump;
- # In addition, it is gzipped using GZip.gzip.
- #
- # The returned String object includes Undumping
- # so it has an #undump method. See Tokens.load.
- #
- # You can configure the level of compression,
- # but the default value 7 should be what you want
- # in most cases as it is a good compromise between
- # speed and compression rate.
- #
- # See GZip module.
- def dump gzip_level = 7
- dump = Marshal.dump self
- dump = GZip.gzip dump, gzip_level
- dump.extend Undumping
- end
-
# Return the actual number of tokens.
def count
size / 2
end
- # Include this module to give an object an #undump
- # method.
- #
- # The string returned by Tokens.dump includes Undumping.
- module Undumping
- # Calls Tokens.load with itself.
- def undump
- Tokens.load self
- end
- end
-
- # Undump the object using Marshal.load, then
- # unzip it using GZip.gunzip.
- #
- # The result is commonly a Tokens object, but
- # this is not guaranteed.
- def Tokens.load dump
- dump = GZip.gunzip dump
- @dump = Marshal.load dump
- end
-
alias text_token push
def begin_group kind; push :begin_group, kind end
def end_group kind; push :end_group, kind end