diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/coderay/encoders/html.rb | 494 | ||||
-rw-r--r-- | lib/coderay/helpers/plugin.rb | 18 | ||||
-rw-r--r-- | lib/coderay/scanner.rb | 476 | ||||
-rw-r--r-- | lib/coderay/scanners/_map.rb | 29 | ||||
-rw-r--r-- | lib/coderay/scanners/c.rb | 318 | ||||
-rw-r--r-- | lib/coderay/scanners/delphi.rb | 260 | ||||
-rw-r--r-- | lib/coderay/scanners/html.rb | 341 | ||||
-rw-r--r-- | lib/coderay/scanners/nitro_xhtml.rb (renamed from lib/coderay/scanners/nitro_html.rb) | 255 | ||||
-rw-r--r-- | lib/coderay/scanners/rhtml.rb | 138 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby.rb | 804 |
10 files changed, 1586 insertions, 1547 deletions
diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index 60c56c1..bd7583a 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -1,245 +1,249 @@ -module CodeRay
-module Encoders
-
- # = HTML Encoder
- #
- # This is CodeRay's most important highlighter:
- # It provides save, fast XHTML generation and CSS support.
- #
- # == Usage
- #
- # require 'coderay'
- # puts CodeRay.scan('Some /code/', :ruby).html #-> a HTML page
- # puts CodeRay.scan('Some /code/', :ruby).html(:wrap => :span) #-> <span class="CodeRay"><span class="co">Some</span> /code/</span>
- # puts CodeRay.scan('Some /code/', :ruby).span #-> the same
- #
- # puts CodeRay.scan('Some code', :ruby).html(
- # :wrap => nil,
- # :line_numbers => :inline,
- # :css => :style
- # )
- # #-> <span class="no">1</span> <span style="color:#036; font-weight:bold;">Some</span> code
- #
- # == Options
- #
- # === :tab_width
- # Convert \t characters to +n+ spaces (a number.)
- # Default: 8
- #
- # === :css
- # How to include the styles; can be :class or :style.
- #
- # Default: :class
- #
- # === :wrap
- # Wrap in :page, :div, :span or nil.
- #
- # You can also use Encoders::Div and Encoders::Span.
- #
- # Default: nil
- #
- # === :line_numbers
- # Include line numbers in :table, :inline, :list or nil (no line numbers)
- #
- # Default: nil
- #
- # === :line_number_start
- # Where to start with line number counting.
- #
- # Default: 1
- #
- # === :bold_every
- # Make every +n+-th number appear bold.
- #
- # Default: 10
- #
- # === :hint
- # Include some information into the output using the title attribute.
- # Can be :info (show token type on mouse-over), :info_long (with full path) or :debug (via inspect).
- #
- # Default: false
- class HTML < Encoder
-
- include Streamable
- register_for :html
-
- FILE_EXTENSION = 'html'
-
- DEFAULT_OPTIONS = {
- :tab_width => 8,
-
- :level => :xhtml,
- :css => :class,
-
- :style => :cycnus,
-
- :wrap => nil,
-
- :line_numbers => nil,
- :line_number_start => 1,
- :bold_every => 10,
-
- :hint => false,
- }
-
- helper :classes, :output, :css
-
- attr_reader :css
-
- protected
-
- HTML_ESCAPE = { #:nodoc:
- '&' => '&',
- '"' => '"',
- '>' => '>',
- '<' => '<',
- }
-
- # This was to prevent illegal HTML.
- # Strange chars should still be avoided in codes.
- evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s]
- evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' }
- #ansi_chars = Array(0x7f..0xff)
- #ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i }
- # \x9 (\t) and \xA (\n) not included
- #HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/
- HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/
-
- TOKEN_KIND_TO_INFO = Hash.new { |h, kind|
- h[kind] =
- case kind
- when :pre_constant
- 'Predefined constant'
- else
- kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize }
- end
- }
-
- # Generate a hint about the given +classes+ in a +hint+ style.
- #
- # +hint+ may be :info, :info_long or :debug.
- def self.token_path_to_hint hint, classes
- return '' unless hint
- title =
- case hint
- when :info
- TOKEN_KIND_TO_INFO[classes.first]
- when :info_long
- classes.reverse.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/')
- when :debug
- classes.inspect
- end
- " title=\"#{title}\""
- end
-
- def setup options
- super
-
- @HTML_ESCAPE = HTML_ESCAPE.dup
- @HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
-
- @opened = [nil]
- @css = CSS.new options[:style]
-
- hint = options[:hint]
- if hint and not [:debug, :info, :info_long].include? hint
- raise ArgumentError, "Unknown value %p for :hint; expected :info, :debug, false or nil." % hint
- end
-
- case options[:css]
-
- when :class
- @css_style = Hash.new do |h, k|
- if k.is_a? Array
- type = k.first
- else
- type = k
- end
- c = ClassOfKind[type]
- if c == :NO_HIGHLIGHT and not hint
- h[k] = false
- else
- title = HTML.token_path_to_hint hint, (k[1..-1] << k.first)
- h[k] = '<span%s class="%s">' % [title, c]
- end
- end
-
- when :style
- @css_style = Hash.new do |h, k|
- if k.is_a? Array
- styles = k.dup
- else
- styles = [k]
- end
- type = styles.first
- classes = styles.map { |c| ClassOfKind[c] }
- if classes.first == :NO_HIGHLIGHT and not hint
- h[k] = false
- else
- styles.shift if [:delimiter, :modifier, :content, :escape].include? styles.first
- title = HTML.token_path_to_hint hint, styles
- classes.delete 'il'
- style = @css[*classes]
- h[k] =
- if style
- '<span%s style="%s">' % [title, style]
- else
- false
- end
- end
- end
-
- else
- raise ArgumentError, "Unknown value %p for :css." % options[:css]
-
- end
- end
-
- def finish options
- not_needed = @opened.shift
- @out << '</span>' * @opened.size
- warn '%d tokens still open' % @opened.size unless @opened.empty?
-
- @out.extend Output
- @out.css = @css
- @out.numerize! options[:line_numbers], options
- @out.wrap! options[:wrap]
-
- super
- end
-
- def token text, type
- if text.is_a? ::String
- if text =~ /#{HTML_ESCAPE_PATTERN}/o
- text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
- end
- @opened[0] = type
- if style = @css_style[@opened]
- @out << style << text << '</span>'
- else
- @out << text
- end
- else
- case text
- when :open
- @opened[0] = type
- @out << (@css_style[@opened] || '<span>')
- @opened << type
- when :close
- unless @opened.empty?
- raise 'Malformed token stream: Trying to close a token that was never opened.' unless @opened.size > 1
- @out << '</span>'
- @opened.pop
- end
- when nil
- raise 'Token with nil as text was given: %p' % [[text, type]]
- else
- raise 'unknown token kind: %p' % text
- end
- end
- end
-
- end
-
-end
-end
+module CodeRay +module Encoders + + # = HTML Encoder + # + # This is CodeRay's most important highlighter: + # It provides save, fast XHTML generation and CSS support. + # + # == Usage + # + # require 'coderay' + # puts CodeRay.scan('Some /code/', :ruby).html #-> a HTML page + # puts CodeRay.scan('Some /code/', :ruby).html(:wrap => :span) #-> <span class="CodeRay"><span class="co">Some</span> /code/</span> + # puts CodeRay.scan('Some /code/', :ruby).span #-> the same + # + # puts CodeRay.scan('Some code', :ruby).html( + # :wrap => nil, + # :line_numbers => :inline, + # :css => :style + # ) + # #-> <span class="no">1</span> <span style="color:#036; font-weight:bold;">Some</span> code + # + # == Options + # + # === :tab_width + # Convert \t characters to +n+ spaces (a number.) + # Default: 8 + # + # === :css + # How to include the styles; can be :class or :style. + # + # Default: :class + # + # === :wrap + # Wrap in :page, :div, :span or nil. + # + # You can also use Encoders::Div and Encoders::Span. + # + # Default: nil + # + # === :line_numbers + # Include line numbers in :table, :inline, :list or nil (no line numbers) + # + # Default: nil + # + # === :line_number_start + # Where to start with line number counting. + # + # Default: 1 + # + # === :bold_every + # Make every +n+-th number appear bold. + # + # Default: 10 + # + # === :hint + # Include some information into the output using the title attribute. + # Can be :info (show token type on mouse-over), :info_long (with full path) or :debug (via inspect). + # + # Default: false + class HTML < Encoder + + include Streamable + register_for :html + + FILE_EXTENSION = 'html' + + DEFAULT_OPTIONS = { + :tab_width => 8, + + :level => :xhtml, + :css => :class, + + :style => :cycnus, + + :wrap => nil, + + :line_numbers => nil, + :line_number_start => 1, + :bold_every => 10, + + :hint => false, + } + + helper :classes, :output, :css + + attr_reader :css + + protected + + HTML_ESCAPE = { #:nodoc: + '&' => '&', + '"' => '"', + '>' => '>', + '<' => '<', + } + + # This was to prevent illegal HTML. + # Strange chars should still be avoided in codes. + evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s] + evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' } + #ansi_chars = Array(0x7f..0xff) + #ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i } + # \x9 (\t) and \xA (\n) not included + #HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/ + HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/ + + TOKEN_KIND_TO_INFO = Hash.new { |h, kind| + h[kind] = + case kind + when :pre_constant + 'Predefined constant' + else + kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize } + end + } + + # Generate a hint about the given +classes+ in a +hint+ style. + # + # +hint+ may be :info, :info_long or :debug. + def self.token_path_to_hint hint, classes + return '' unless hint + title = + case hint + when :info + TOKEN_KIND_TO_INFO[classes.first] + when :info_long + classes.reverse.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/') + when :debug + classes.inspect + end + " title=\"#{title}\"" + end + + def setup options + super + + @HTML_ESCAPE = HTML_ESCAPE.dup + @HTML_ESCAPE["\t"] = ' ' * options[:tab_width] + + @opened = [nil] + @css = CSS.new options[:style] + + hint = options[:hint] + if hint and not [:debug, :info, :info_long].include? hint + raise ArgumentError, "Unknown value %p for :hint; expected :info, :debug, false or nil." % hint + end + + case options[:css] + + when :class + @css_style = Hash.new do |h, k| + if k.is_a? Array + type = k.first + else + type = k + end + c = ClassOfKind[type] + if c == :NO_HIGHLIGHT and not hint + h[k] = false + else + title = HTML.token_path_to_hint hint, (k[1..-1] << k.first) + h[k] = '<span%s class="%s">' % [title, c] + end + end + + when :style + @css_style = Hash.new do |h, k| + if k.is_a? Array + styles = k.dup + else + styles = [k] + end + type = styles.first + classes = styles.map { |c| ClassOfKind[c] } + if classes.first == :NO_HIGHLIGHT and not hint + h[k] = false + else + styles.shift if [:delimiter, :modifier, :content, :escape].include? styles.first + title = HTML.token_path_to_hint hint, styles + classes.delete 'il' + style = @css[*classes] + h[k] = + if style + '<span%s style="%s">' % [title, style] + else + false + end + end + end + + else + raise ArgumentError, "Unknown value %p for :css." % options[:css] + + end + end + + def finish options + not_needed = @opened.shift + @out << '</span>' * @opened.size + warn '%d tokens still open: %p' % [@opened.size, @opened] unless @opened.empty? + + @out.extend Output + @out.css = @css + @out.numerize! options[:line_numbers], options + @out.wrap! options[:wrap] + + super + end + + def token text, type + if text.is_a? ::String + if text =~ /#{HTML_ESCAPE_PATTERN}/o + text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } + end + @opened[0] = type + if style = @css_style[@opened] + @out << style << text << '</span>' + else + @out << text + end + else + case text + when :open + @opened[0] = type + @out << (@css_style[@opened] || '<span>') + @opened << type + when :close + if @opened.empty? + # nothing to close + else + if @opened.size == 1 or @opened.last != type + raise 'Malformed token stream: Trying to close a token (%p) that is not open. Open are: %p.' % [type, @opened[1..-1]] if $DEBUG + end + @out << '</span>' + @opened.pop + end + when nil + raise 'Token with nil as text was given: %p' % [[text, type]] + else + raise 'unknown token kind: %p' % text + end + end + end + + end + +end +end diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb index 7e90279..742717d 100644 --- a/lib/coderay/helpers/plugin.rb +++ b/lib/coderay/helpers/plugin.rb @@ -168,6 +168,15 @@ module PluginHost end
end
+ # Makes a map of all loaded plugins.
+ def inspect
+ map = plugin_hash.dup
+ map.each do |id, plugin|
+ map[id] = plugin.to_s[/(?>[\w_]+)$/]
+ end
+ "#{name}[#{host_id}]#{map.inspect}"
+ end
+
protected
# Created a new plugin list and stores it to @plugin_hash.
def create_plugin_hash
@@ -194,15 +203,6 @@ protected end
end
- # Makes a map of all loaded scanners.
- def inspect
- map = plugin_hash.dup
- map.each do |id, plugin|
- map[id] = plugin.name[/(?>[\w_]+)$/]
- end
- map.inspect
- end
-
# Loads the map file (see map).
#
# This is done automatically when plugin_path is called.
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb index 6d20211..83f73f2 100644 --- a/lib/coderay/scanner.rb +++ b/lib/coderay/scanner.rb @@ -1,238 +1,238 @@ -module CodeRay
-
- require 'coderay/helpers/plugin'
-
- # = Scanners
- #
- # $Id$
- #
- # This module holds the Scanner class and its subclasses.
- # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
- # can be found in coderay/scanners/ruby.
- #
- # Scanner also provides methods and constants for the register
- # mechanism and the [] method that returns the Scanner class
- # belonging to the given lang.
- #
- # See PluginHost.
- module Scanners
- extend PluginHost
- plugin_path File.dirname(__FILE__), 'scanners'
-
- require 'strscan'
-
- # = Scanner
- #
- # The base class for all Scanners.
- #
- # It is a subclass of Ruby's great +StringScanner+, which
- # makes it easy to access the scanning methods inside.
- #
- # It is also +Enumerable+, so you can use it like an Array of
- # Tokens:
- #
- # require 'coderay'
- #
- # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
- #
- # for text, kind in c_scanner
- # puts text if kind == :operator
- # end
- #
- # # prints: (*==)++;
- #
- # OK, this is a very simple example :)
- # You can also use +map+, +any?+, +find+ and even +sort_by+,
- # if you want.
- class Scanner < StringScanner
- extend Plugin
- plugin_host Scanners
-
- # Raised if a Scanner fails while scanning
- ScanError = Class.new(Exception)
-
- require 'coderay/helpers/word_list'
-
- # The default options for all scanner classes.
- #
- # Define @default_options for subclasses.
- DEFAULT_OPTIONS = { :stream => false }
-
- class << self
-
- # Returns if the Scanner can be used in streaming mode.
- def streamable?
- is_a? Streamable
- end
-
- def normify code
- code = code.to_s.to_unix
- end
-
- end
-
-=begin
-## Excluded for speed reasons; protected seems to make methods slow.
-
- # Save the StringScanner methods from being called.
- # This would not be useful for highlighting.
- strscan_public_methods =
- StringScanner.instance_methods -
- StringScanner.ancestors[1].instance_methods
- protected(*strscan_public_methods)
-=end
-
- # Create a new Scanner.
- #
- # * +code+ is the input String and is handled by the superclass
- # StringScanner.
- # * +options+ is a Hash with Symbols as keys.
- # It is merged with the default options of the class (you can
- # overwrite default options here.)
- # * +block+ is the callback for streamed highlighting.
- #
- # If you set :stream to +true+ in the options, the Scanner uses a
- # TokenStream with the +block+ as callback to handle the tokens.
- #
- # Else, a Tokens object is used.
- def initialize code='', options = {}, &block
- @options = self.class::DEFAULT_OPTIONS.merge options
- raise "I am only the basic Scanner class. I can't scan "\
- "anything. :( Use my subclasses." if self.class == Scanner
-
- super Scanner.normify(code)
-
- @tokens = options[:tokens]
- if @options[:stream]
- warn "warning in CodeRay::Scanner.new: :stream is set, "\
- "but no block was given" unless block_given?
- raise NotStreamableError, self unless kind_of? Streamable
- @tokens ||= TokenStream.new(&block)
- else
- warn "warning in CodeRay::Scanner.new: Block given, "\
- "but :stream is #{@options[:stream]}" if block_given?
- @tokens ||= Tokens.new
- end
-
- setup
- end
-
- # More mnemonic accessor name for the input string.
- alias code string
-
- def reset
- super
- reset_instance
- end
-
- def string= code
- code = Scanner.normify(code)
- super code
- reset_instance
- end
-
- # Scans the code and returns all tokens in a Tokens object.
- def tokenize new_string=nil, options = {}
- options = @options.merge(options)
- self.string = new_string if new_string
- @cached_tokens =
- if @options[:stream] # :stream must have been set already
- reset unless new_string
- scan_tokens @tokens, options
- @tokens
- else
- scan_tokens @tokens, options
- end
- end
-
- def tokens
- @cached_tokens ||= tokenize
- end
-
- # Traverses the tokens.
- def each &block
- raise ArgumentError,
- 'Cannot traverse TokenStream.' if @options[:stream]
- tokens.each(&block)
- end
- include Enumerable
-
- # The current line position of the scanner.
- #
- # Beware, this is implemented inefficiently. It should be used
- # for debugging only.
- def line
- string[0..pos].count("\n") + 1
- end
-
- protected
-
- # Can be implemented by subclasses to do some initialization
- # that has to be done once per instance.
- #
- # Use reset for initialization that has to be done once per
- # scan.
- def setup
- end
-
- # This is the central method, and commonly the only one a
- # subclass implements.
- #
- # Subclasses must implement this method; it must return +tokens+
- # and must only use Tokens#<< for storing scanned tokens!
- def scan_tokens tokens, options
- raise NotImplementedError,
- "#{self.class}#scan_tokens not implemented."
- end
-
- def reset_instance
- @tokens.clear unless @options[:keep_tokens]
- @cached_tokens = nil
- end
-
- # Scanner error with additional status information
- def raise_inspect msg, tokens, ambit = 30
- raise ScanError, <<-EOE % [
-
-
-***ERROR in %s: %s
-
-tokens:
-%s
-
-current line: %d pos = %d
-matched: %p
-bol? = %p, eos? = %p
-
-surrounding code:
-%p ~~ %p
-
-
-***ERROR***
-
- EOE
- File.basename(caller[0]),
- msg,
- tokens.last(10).map { |t| t.inspect }.join("\n"),
- line, pos,
- matched, bol?, eos?,
- string[pos-ambit,ambit],
- string[pos,ambit],
- ]
- end
-
- end
-
- end
-end
-
-class String
- # I love this hack. It seems to silence all dos/unix/mac newline problems.
- def to_unix
- if index ?\r
- gsub(/\r\n?/, "\n")
- else
- self
- end
- end
-end
+module CodeRay + + require 'coderay/helpers/plugin' + + # = Scanners + # + # $Id$ + # + # This module holds the Scanner class and its subclasses. + # For example, the Ruby scanner is named CodeRay::Scanners::Ruby + # can be found in coderay/scanners/ruby. + # + # Scanner also provides methods and constants for the register + # mechanism and the [] method that returns the Scanner class + # belonging to the given lang. + # + # See PluginHost. + module Scanners + extend PluginHost + plugin_path File.dirname(__FILE__), 'scanners' + + require 'strscan' + + # = Scanner + # + # The base class for all Scanners. + # + # It is a subclass of Ruby's great +StringScanner+, which + # makes it easy to access the scanning methods inside. + # + # It is also +Enumerable+, so you can use it like an Array of + # Tokens: + # + # require 'coderay' + # + # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;" + # + # for text, kind in c_scanner + # puts text if kind == :operator + # end + # + # # prints: (*==)++; + # + # OK, this is a very simple example :) + # You can also use +map+, +any?+, +find+ and even +sort_by+, + # if you want. + class Scanner < StringScanner + extend Plugin + plugin_host Scanners + + # Raised if a Scanner fails while scanning + ScanError = Class.new(Exception) + + require 'coderay/helpers/word_list' + + # The default options for all scanner classes. + # + # Define @default_options for subclasses. + DEFAULT_OPTIONS = { :stream => false } + + class << self + + # Returns if the Scanner can be used in streaming mode. + def streamable? + is_a? Streamable + end + + def normify code + code = code.to_s.to_unix + end + + end + +=begin +## Excluded for speed reasons; protected seems to make methods slow. + + # Save the StringScanner methods from being called. + # This would not be useful for highlighting. + strscan_public_methods = + StringScanner.instance_methods - + StringScanner.ancestors[1].instance_methods + protected(*strscan_public_methods) +=end + + # Create a new Scanner. + # + # * +code+ is the input String and is handled by the superclass + # StringScanner. + # * +options+ is a Hash with Symbols as keys. + # It is merged with the default options of the class (you can + # overwrite default options here.) + # * +block+ is the callback for streamed highlighting. + # + # If you set :stream to +true+ in the options, the Scanner uses a + # TokenStream with the +block+ as callback to handle the tokens. + # + # Else, a Tokens object is used. + def initialize code='', options = {}, &block + @options = self.class::DEFAULT_OPTIONS.merge options + raise "I am only the basic Scanner class. I can't scan "\ + "anything. :( Use my subclasses." if self.class == Scanner + + super Scanner.normify(code) + + @tokens = options[:tokens] + if @options[:stream] + warn "warning in CodeRay::Scanner.new: :stream is set, "\ + "but no block was given" unless block_given? + raise NotStreamableError, self unless kind_of? Streamable + @tokens ||= TokenStream.new(&block) + else + warn "warning in CodeRay::Scanner.new: Block given, "\ + "but :stream is #{@options[:stream]}" if block_given? + @tokens ||= Tokens.new + end + + setup + end + + # More mnemonic accessor name for the input string. + alias code string + + def reset + super + reset_instance + end + + def string= code + code = Scanner.normify(code) + super code + reset_instance + end + + # Scans the code and returns all tokens in a Tokens object. + def tokenize new_string=nil, options = {} + options = @options.merge(options) + self.string = new_string if new_string + @cached_tokens = + if @options[:stream] # :stream must have been set already + reset unless new_string + scan_tokens @tokens, options + @tokens + else + scan_tokens @tokens, options + end + end + + def tokens + @cached_tokens ||= tokenize + end + + # Traverses the tokens. + def each &block + raise ArgumentError, + 'Cannot traverse TokenStream.' if @options[:stream] + tokens.each(&block) + end + include Enumerable + + # The current line position of the scanner. + # + # Beware, this is implemented inefficiently. It should be used + # for debugging only. + def line + string[0..pos].count("\n") + 1 + end + + protected + + # Can be implemented by subclasses to do some initialization + # that has to be done once per instance. + # + # Use reset for initialization that has to be done once per + # scan. + def setup + end + + # This is the central method, and commonly the only one a + # subclass implements. + # + # Subclasses must implement this method; it must return +tokens+ + # and must only use Tokens#<< for storing scanned tokens! + def scan_tokens tokens, options + raise NotImplementedError, + "#{self.class}#scan_tokens not implemented." + end + + def reset_instance + @tokens.clear unless @options[:keep_tokens] + @cached_tokens = nil + end + + # Scanner error with additional status information + def raise_inspect msg, tokens, state = nil, ambit = 30 + raise ScanError, <<-EOE % [ + + +***ERROR in %s: %s + +tokens: +%s + +current line: %d pos = %d +matched: %p state: %p +bol? = %p, eos? = %p + +surrounding code: +%p ~~ %p + + +***ERROR*** + + EOE + File.basename(caller[0]), + msg, + tokens.last(10).map { |t| t.inspect }.join("\n"), + line, pos, + matched, state, bol?, eos?, + string[pos-ambit,ambit], + string[pos,ambit], + ] + end + + end + + end +end + +class String + # I love this hack. It seems to silence all dos/unix/mac newline problems. + def to_unix + if index ?\r + gsub(/\r\n?/, "\n") + else + self + end + end +end diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index 6268a6c..1c5fc89 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -1,14 +1,15 @@ -module CodeRay
-module Scanners
-
- map :cpp => :c,
- :plain => :plaintext,
- :pascal => :delphi,
- :irb => :ruby,
- :xml => :html,
- :xhtml => :nitro_html
-
- default :plain
-
-end
-end
+module CodeRay +module Scanners + + map :cpp => :c, + :plain => :plaintext, + :pascal => :delphi, + :irb => :ruby, + :xml => :html, + :xhtml => :nitro_xhtml, + :nitro => :nitro_xhtml + + default :plain + +end +end diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb index 66b8de1..be113d0 100644 --- a/lib/coderay/scanners/c.rb +++ b/lib/coderay/scanners/c.rb @@ -1,155 +1,163 @@ -module CodeRay
-module Scanners
-
- class C < Scanner
-
- register_for :c
-
- RESERVED_WORDS = [
- 'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
- 'for', 'goto', 'if', 'return', 'switch', 'while',
- 'struct', 'union', 'enum', 'typedef',
- 'static', 'register', 'auto', 'extern',
- 'sizeof',
- 'volatile', 'const', # C89
- 'inline', 'restrict', # C99
- ]
-
- PREDEFINED_TYPES = [
- 'int', 'long', 'short', 'char', 'void',
- 'signed', 'unsigned', 'float', 'double',
- 'bool', 'complex', # C99
- ]
-
- PREDEFINED_CONSTANTS = [
- 'EOF', 'NULL',
- 'true', 'false', # C99
- ]
-
- IDENT_KIND = WordList.new(:ident).
- add(RESERVED_WORDS, :reserved).
- add(PREDEFINED_TYPES, :pre_type).
- add(PREDEFINED_CONSTANTS, :pre_constant)
-
- ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
- UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
-
- def scan_tokens tokens, options
-
- state = :initial
-
- until eos?
-
- kind = :error
- match = nil
-
- case state
-
- when :initial
-
- if scan(/ \s+ | \\\n /x)
- kind = :space
-
- elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
- kind = :comment
-
- elsif match = scan(/ \# \s* if \s* 0 /x)
- match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
- kind = :comment
-
- elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
- kind = :operator
-
- elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
- kind = IDENT_KIND[match]
- if kind == :ident and check(/:(?!:)/)
- match << scan(/:/)
- kind = :label
- end
-
- elsif match = scan(/L?"/)
- tokens << [:open, :string]
- if match[0] == ?L
- tokens << ['L', :modifier]
- match = '"'
- end
- state = :string
- kind = :delimiter
-
- elsif scan(/#\s*(\w*)/)
- kind = :preprocessor # FIXME multiline preprocs
- state = :include_expected if self[1] == 'include'
-
- elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
- kind = :char
-
- elsif scan(/0[xX][0-9A-Fa-f]+/)
- kind = :hex
-
- elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
- kind = :oct
-
- elsif scan(/(?:\d+)(?![.eEfF])/)
- kind = :integer
-
- elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
- kind = :float
-
- else
- getch
- end
-
- when :string
- if scan(/[^\\"]+/)
- kind = :content
- elsif scan(/"/)
- tokens << ['"', :delimiter]
- tokens << [:close, :string]
- state = :initial
- next
- elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
- kind = :char
- elsif scan(/ \\ | $ /x)
- kind = :error
- state = :initial
- else
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
- end
-
- when :include_expected
- if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
- kind = :include
- state = :initial
-
- elsif match = scan(/\s+/)
- kind = :space
- state = :initial if match.index ?\n
-
- else
- getch
-
- end
-
- else
- raise_inspect 'Unknown state', tokens
-
- end
-
- match ||= matched
- if $DEBUG and (not kind or kind == :error)
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
-
- end
-
- tokens
- end
-
- end
-
-end
-end
+module CodeRay +module Scanners + + class C < Scanner + + register_for :c + + RESERVED_WORDS = [ + 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', + 'for', 'goto', 'if', 'return', 'switch', 'while', + 'struct', 'union', 'enum', 'typedef', + 'static', 'register', 'auto', 'extern', + 'sizeof', + 'volatile', 'const', # C89 + 'inline', 'restrict', # C99 + ] + + PREDEFINED_TYPES = [ + 'int', 'long', 'short', 'char', 'void', + 'signed', 'unsigned', 'float', 'double', + 'bool', 'complex', # C99 + ] + + PREDEFINED_CONSTANTS = [ + 'EOF', 'NULL', + 'true', 'false', # C99 + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = nil + match = nil + + case state + + when :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + kind = :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + kind = :comment + + elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident and check(/:(?!:)/) + match << scan(/:/) + kind = :label + end + + elsif match = scan(/L?"/) + tokens << [:open, :string] + if match[0] == ?L + tokens << ['L', :modifier] + match = '"' + end + state = :string + kind = :delimiter + + elsif scan(/#\s*(\w*)/) + kind = :preprocessor # FIXME multiline preprocs + state = :include_expected if self[1] == 'include' + + elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + kind = :char + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :hex + + elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + kind = :oct + + elsif scan(/(?:\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + kind = :error + + end + + when :string + if scan(/[^\\\n"]+/) + kind = :content + elsif scan(/"/) + tokens << ['"', :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :char + elsif scan(/ \\ | $ /x) + tokens << [:close, :string] + kind = :error + state = :initial + else + raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + end + + when :include_expected + if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + kind = :include + state = :initial + + elsif match = scan(/\s+/) + kind = :space + state = :initial if match.index ?\n + + else + getch + kind = :error + + end + + else + raise_inspect 'Unknown state', tokens + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + if state == :string + tokens << [:close, :string] + end + + tokens + end + + end + +end +end diff --git a/lib/coderay/scanners/delphi.rb b/lib/coderay/scanners/delphi.rb index d9d9e1d..c141874 100644 --- a/lib/coderay/scanners/delphi.rb +++ b/lib/coderay/scanners/delphi.rb @@ -1,129 +1,131 @@ -module CodeRay
-module Scanners
-
- class Delphi < Scanner
-
- register_for :delphi
-
- RESERVED_WORDS = [
- 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
- 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
- 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
- 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
- 'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
- 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
- 'procedure', 'program', 'property', 'raise', 'record', 'repeat',
- 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
- 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
- 'xor', 'on'
- ]
-
- DIRECTIVES = [
- 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
- 'contains', 'deprecated', 'dispid', 'dynamic', 'export',
- 'external', 'far', 'forward', 'implements', 'local',
- 'near', 'nodefault', 'on', 'overload', 'override',
- 'package', 'pascal', 'platform', 'private', 'protected', 'public',
- 'published', 'read', 'readonly', 'register', 'reintroduce',
- 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
- 'virtual', 'write', 'writeonly'
- ]
-
- IDENT_KIND = CaseIgnoringWordList.new(:ident).
- add(RESERVED_WORDS, :reserved).
- add(DIRECTIVES, :directive)
-
- def scan_tokens tokens, options
-
- state = :initial
-
- until eos?
-
- kind = :error
- match = nil
-
- if state == :initial
-
- if scan(/ \s+ /x)
- kind = :space
-
- elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
- kind = :preprocessor
-
- elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
- kind = :comment
-
- elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x)
- kind = :operator
-
- elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
- kind = IDENT_KIND[match]
-
- elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
- tokens << [:open, :char]
- tokens << ["'", :delimiter]
- tokens << [self[1], :content]
- tokens << ["'", :delimiter]
- tokens << [:close, :char]
- next
-
- elsif match = scan(/ ' /x)
- tokens << [:open, :string]
- state = :string
- kind = :delimiter
-
- elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
- kind = :char
-
- elsif scan(/ \$ [0-9A-Fa-f]+ /x)
- kind = :hex
-
- elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
- kind = :integer
-
- elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
- kind = :float
-
- else
- getch
- end
-
- elsif state == :string
- if scan(/[^\n']+/)
- kind = :content
- elsif scan(/''/)
- kind = :char
- elsif scan(/'/)
- tokens << ["'", :delimiter]
- tokens << [:close, :string]
- state = :initial
- next
- elsif scan(/\n/)
- state = :initial
- else
- raise "else case \' reached; %p not handled." % peek(1), tokens
- end
-
- else
- raise 'else-case reached', tokens
-
- end
-
- match ||= matched
- if $DEBUG and (not kind or kind == :error)
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
-
- end
-
- tokens
- end
-
- end
-
-end
-end
+module CodeRay +module Scanners + + class Delphi < Scanner + + register_for :delphi + + RESERVED_WORDS = [ + 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class', + 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do', + 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization', + 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in', + 'inherited', 'initialization', 'inline', 'interface', 'is', 'label', + 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed', + 'procedure', 'program', 'property', 'raise', 'record', 'repeat', + 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar', + 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', + 'xor', 'on' + ] + + DIRECTIVES = [ + 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl', + 'contains', 'deprecated', 'dispid', 'dynamic', 'export', + 'external', 'far', 'forward', 'implements', 'local', + 'near', 'nodefault', 'on', 'overload', 'override', + 'package', 'pascal', 'platform', 'private', 'protected', 'public', + 'published', 'read', 'readonly', 'register', 'reintroduce', + 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs', + 'virtual', 'write', 'writeonly' + ] + + IDENT_KIND = CaseIgnoringWordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(DIRECTIVES, :directive) + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = nil + match = nil + + if state == :initial + + if scan(/ \s+ /x) + kind = :space + + elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) + kind = :preprocessor + + elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) + kind = :comment + + elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + + elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) + tokens << [:open, :char] + tokens << ["'", :delimiter] + tokens << [self[1], :content] + tokens << ["'", :delimiter] + tokens << [:close, :char] + next + + elsif match = scan(/ ' /x) + tokens << [:open, :string] + state = :string + kind = :delimiter + + elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) + kind = :char + + elsif scan(/ \$ [0-9A-Fa-f]+ /x) + kind = :hex + + elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) + kind = :integer + + elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) + kind = :float + + else + kind = :error + getch + + end + + elsif state == :string + if scan(/[^\n']+/) + kind = :content + elsif scan(/''/) + kind = :char + elsif scan(/'/) + tokens << ["'", :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/\n/) + state = :initial + else + raise "else case \' reached; %p not handled." % peek(1), tokens + end + + else + raise 'else-case reached', tokens + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + tokens + end + + end + +end +end diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 7cdc07e..181e5d3 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -1,167 +1,174 @@ -module CodeRay
-module Scanners
-
- # HTML Scanner
- #
- # $Id$
- class HTML < Scanner
-
- include Streamable
- register_for :html
-
- ATTR_NAME = /[\w.:-]+/
- ATTR_VALUE_UNQUOTED = ATTR_NAME
- TAG_END = /\/?>/
- HEX = /[0-9a-fA-F]/
- ENTITY = /
- &
- (?:
- \w+
- |
- \#
- (?:
- \d+
- |
- x#{HEX}+
- )
- )
- ;
- /ox
-
- PLAIN_STRING_CONTENT = {
- "'" => /[^&'>\n]+/,
- '"' => /[^&">\n]+/,
- }
-
- private
- def setup
- @state = :initial
- @plain_string_content = nil
- end
-
- def scan_tokens tokens, options
-
- state = @state
- plain_string_content = @plain_string_content
-
- until eos?
-
- kind = :error
- match = nil
-
- if scan(/\s+/m)
- kind = :space
-
- else
-
- case state
-
- when :initial
- if scan(/<!--.*?-->/m)
- kind = :comment
- elsif scan(/<!DOCTYPE.*?>/m)
- kind = :preprocessor
- elsif scan(/<\?xml.*?\?>/m)
- kind = :preprocessor
- elsif scan(/<\?.*?\?>|<%.*?%>/m)
- kind = :comment
- elsif scan(/<\/[-\w_.:]*>/m)
- kind = :tag
- elsif match = scan(/<[-\w_.:]*>?/m)
- kind = :tag
- state = :attribute unless match[-1] == ?>
- elsif scan(/[^<>&]+/)
- kind = :plain
- elsif scan(/#{ENTITY}/ox)
- kind = :entity
- elsif scan(/[>&]/)
- kind = :error
- else
- raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
- end
-
- when :attribute
- if scan(/#{TAG_END}/)
- kind = :tag
- state = :initial
- elsif scan(/#{ATTR_NAME}/o)
- kind = :attribute_name
- state = :attribute_equal
- else
- getch
- end
-
- when :attribute_equal
- if scan(/=/)
- kind = :operator
- state = :attribute_value
- elsif scan(/#{ATTR_NAME}/o)
- kind = :attribute_name
- elsif scan(/#{TAG_END}/o)
- kind = :tag
- state = :initial
- elsif scan(/./)
- state = :attribute
- end
-
- when :attribute_value
- if scan(/#{ATTR_VALUE_UNQUOTED}/o)
- kind = :attribute_value
- state = :attribute
- elsif match = scan(/["']/)
- tokens << [:open, :string]
- state = :attribute_value_string
- plain_string_content = PLAIN_STRING_CONTENT[match]
- kind = :delimiter
- elsif scan(/#{TAG_END}/o)
- kind = :tag
- state = :initial
- else
- getch
- end
-
- when :attribute_value_string
- if scan(plain_string_content)
- kind = :content
- elsif scan(/['"]/)
- tokens << [matched, :delimiter]
- tokens << [:close, :string]
- state = :attribute
- next
- elsif scan(/#{ENTITY}/ox)
- kind = :entity
- elsif scan(/[\n>]/)
- tokens << [:close, :string]
- kind = :error
- state = :initial
- end
-
- else
- raise_inspect 'Unknown state: %p' % [state], tokens
-
- end
-
- end
-
- match ||= matched
- if $DEBUG and (not kind or kind == :error)
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
- end
-
- if options[:keep_state]
- @state = state
- @plain_string_content = plain_string_content
- end
-
- tokens
- end
-
- end
-
-end
-end
+module CodeRay +module Scanners + + # HTML Scanner + # + # $Id$ + class HTML < Scanner + + include Streamable + register_for :html + + ATTR_NAME = /[\w.:-]+/ + ATTR_VALUE_UNQUOTED = ATTR_NAME + TAG_END = /\/?>/ + HEX = /[0-9a-fA-F]/ + ENTITY = / + & + (?: + \w+ + | + \# + (?: + \d+ + | + x#{HEX}+ + ) + ) + ; + /ox + + PLAIN_STRING_CONTENT = { + "'" => /[^&'>\n]+/, + '"' => /[^&">\n]+/, + } + + def reset + super + @state = :initial + end + + private + def setup + @state = :initial + @plain_string_content = nil + end + + def scan_tokens tokens, options + + state = @state + plain_string_content = @plain_string_content + + until eos? + + kind = nil + match = nil + + if scan(/\s+/m) + kind = :space + + else + + case state + + when :initial + if scan(/<!--.*?-->/m) + kind = :comment + elsif scan(/<!DOCTYPE.*?>/m) + kind = :preprocessor + elsif scan(/<\?xml.*?\?>/m) + kind = :preprocessor + elsif scan(/<\?.*?\?>|<%.*?%>/m) + kind = :comment + elsif scan(/<\/[-\w_.:]*>/m) + kind = :tag + elsif match = scan(/<[-\w_.:]+>?/m) + kind = :tag + state = :attribute unless match[-1] == ?> + elsif scan(/[^<>&]+/) + kind = :plain + elsif scan(/#{ENTITY}/ox) + kind = :entity + elsif scan(/[<>&]/) + kind = :error + else + raise_inspect '[BUG] else-case reached with state %p' % [state], tokens + end + + when :attribute + if scan(/#{TAG_END}/) + kind = :tag + state = :initial + elsif scan(/#{ATTR_NAME}/o) + kind = :attribute_name + state = :attribute_equal + else + kind = :error + getch + end + + when :attribute_equal + if scan(/=/) + kind = :operator + state = :attribute_value + elsif scan(/#{ATTR_NAME}/o) + kind = :attribute_name + elsif scan(/#{TAG_END}/o) + kind = :tag + state = :initial + elsif scan(/./) + state = :attribute + end + + when :attribute_value + if scan(/#{ATTR_VALUE_UNQUOTED}/o) + kind = :attribute_value + state = :attribute + elsif match = scan(/["']/) + tokens << [:open, :string] + state = :attribute_value_string + plain_string_content = PLAIN_STRING_CONTENT[match] + kind = :delimiter + elsif scan(/#{TAG_END}/o) + kind = :tag + state = :initial + else + kind = :error + getch + end + + when :attribute_value_string + if scan(plain_string_content) + kind = :content + elsif scan(/['"]/) + tokens << [matched, :delimiter] + tokens << [:close, :string] + state = :attribute + next + elsif scan(/#{ENTITY}/ox) + kind = :entity + elsif scan(/[\n>]/) + tokens << [:close, :string] + kind = :error + state = :initial + end + + else + raise_inspect 'Unknown state: %p' % [state], tokens + + end + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens, state + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + end + + if options[:keep_state] + @state = state + @plain_string_content = plain_string_content + end + + tokens + end + + end + +end +end diff --git a/lib/coderay/scanners/nitro_html.rb b/lib/coderay/scanners/nitro_xhtml.rb index 5955195..baef162 100644 --- a/lib/coderay/scanners/nitro_html.rb +++ b/lib/coderay/scanners/nitro_xhtml.rb @@ -1,125 +1,130 @@ -module CodeRay
-module Scanners
-
- load :html
- load :ruby
-
- # RHTML Scanner
- #
- # $Id$
- class NitroHTML < Scanner
-
- include Streamable
- register_for :nitro_html
-
- NITRO_RUBY_BLOCK = /
- <\?r
- (?>
- [^\?]*
- (?> \?(?!>) [^\?]* )*
- )
- (?: \?> )?
- |
- <ruby>
- (?>
- [^<]*
- (?> <(?!\/ruby>) [^<]* )*
- )
- (?: <\/ruby> )?
- |
- <%
- (?>
- [^%]*
- (?> %(?!>) [^%]* )*
- )
- (?: %> )?
- /mx
-
- NITRO_VALUE_BLOCK = /
- \#
- (?:
- \{
- [^{}]*
- (?>
- \{ [^}]* \}
- (?> [^{}]* )
- )*
- \}?
- | \| [^|]* \|?
- | \( [^)]* \)?
- | \[ [^\]]* \]?
- | \\ [^\\]* \\?
- )
- /x
-
- NITRO_ENTITY = /
- % (?: \#\d+ | \w+ ) ;
- /
-
- START_OF_RUBY = /
- (?=[<\#%])
- < (?: \?r | % | ruby> )
- | \# [{(|]
- | % (?: \#\d+ | \w+ ) ;
- /x
-
- CLOSING_PAREN = Hash.new do |h, p|
- h[p] = p
- end.update( {
- '(' => ')',
- '[' => ']',
- '{' => '}',
- } )
-
- private
-
- def setup
- @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
- @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
- end
-
- def scan_tokens tokens, options
-
- until eos?
-
- if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty?
- @html_scanner.tokenize match
-
- elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
- start_tag = match[0,2]
- delimiter = CLOSING_PAREN[start_tag[1,1]]
- end_tag = match[-1,1] == delimiter ? delimiter : ''
- tokens << [:open, :inline]
- tokens << [start_tag, :delimiter]
- code = match[start_tag.size .. -1 - end_tag.size]
- @ruby_scanner.tokenize code
- tokens << [end_tag, :delimiter] unless end_tag.empty?
- tokens << [:close, :inline]
-
- elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
- start_tag = '<?r'
- end_tag = match[-2,2] == '?>' ? '?>' : ''
- tokens << [:open, :inline]
- tokens << [start_tag, :delimiter]
- code = match[start_tag.size .. -(end_tag.size)-1]
- @ruby_scanner.tokenize code
- tokens << [end_tag, :delimiter] unless end_tag.empty?
- tokens << [:close, :inline]
-
- elsif entity = scan(/#{NITRO_ENTITY}/o)
- tokens << [entity, :entity]
-
- else
- raise_inspect 'else-case reached!', tokens
- end
-
- end
-
- tokens
-
- end
-
- end
-
-end
-end
+module CodeRay +module Scanners + + load :html + load :ruby + + # Nitro XHTML Scanner + # + # $Id$ + class NitroXHTML < Scanner + + include Streamable + register_for :nitro_xhtml + + NITRO_RUBY_BLOCK = / + <\?r + (?> + [^\?]* + (?> \?(?!>) [^\?]* )* + ) + (?: \?> )? + | + <ruby> + (?> + [^<]* + (?> <(?!\/ruby>) [^<]* )* + ) + (?: <\/ruby> )? + | + <% + (?> + [^%]* + (?> %(?!>) [^%]* )* + ) + (?: %> )? + /mx + + NITRO_VALUE_BLOCK = / + \# + (?: + \{ + [^{}]* + (?> + \{ [^}]* \} + (?> [^{}]* ) + )* + \}? + | \| [^|]* \|? + | \( [^)]* \)? + | \[ [^\]]* \]? + | \\ [^\\]* \\? + ) + /x + + NITRO_ENTITY = / + % (?: \#\d+ | \w+ ) ; + / + + START_OF_RUBY = / + (?=[<\#%]) + < (?: \?r | % | ruby> ) + | \# [{(|] + | % (?: \#\d+ | \w+ ) ; + /x + + CLOSING_PAREN = Hash.new do |h, p| + h[p] = p + end.update( { + '(' => ')', + '[' => ']', + '{' => '}', + } ) + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def reset_instance + super + @html_scanner.reset + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{NITRO_VALUE_BLOCK}/o) + start_tag = match[0,2] + delimiter = CLOSING_PAREN[start_tag[1,1]] + end_tag = match[-1,1] == delimiter ? delimiter : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif match = scan(/#{NITRO_RUBY_BLOCK}/o) + start_tag = '<?r' + end_tag = match[-2,2] == '?>' ? '?>' : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -(end_tag.size)-1] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif entity = scan(/#{NITRO_ENTITY}/o) + tokens << [entity, :entity] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end + +end +end diff --git a/lib/coderay/scanners/rhtml.rb b/lib/coderay/scanners/rhtml.rb index 15a7566..8afb727 100644 --- a/lib/coderay/scanners/rhtml.rb +++ b/lib/coderay/scanners/rhtml.rb @@ -1,65 +1,73 @@ -module CodeRay
-module Scanners
-
- load :html
- load :ruby
-
- # RHTML Scanner
- #
- # $Id$
- class RHTML < Scanner
-
- include Streamable
- register_for :rhtml
-
- ERB_RUBY_BLOCK = /
- <%(?!%)[=-]?
- (?>
- [^%]*
- (?> %(?!>) [^%]* )*
- )
- (?: %> )?
- /x
-
- START_OF_ERB = /
- <%(?!%)
- /x
-
- private
-
- def setup
- @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
- @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
- end
-
- def scan_tokens tokens, options
-
- until eos?
-
- if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
- @html_scanner.tokenize match
-
- elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
- start_tag = match[/\A<%[-=]?/]
- end_tag = match[/%?>?\z/]
- tokens << [:open, :inline]
- tokens << [start_tag, :delimiter]
- code = match[start_tag.size .. -1 - end_tag.size]
- @ruby_scanner.tokenize code
- tokens << [end_tag, :delimiter] unless end_tag.empty?
- tokens << [:close, :inline]
-
- else
- raise_inspect 'else-case reached!', tokens
- end
-
- end
-
- tokens
-
- end
-
- end
-
-end
-end
+module CodeRay +module Scanners + + load :html + load :ruby + + # RHTML Scanner + # + # $Id$ + class RHTML < Scanner + + include Streamable + register_for :rhtml + + ERB_RUBY_BLOCK = / + <%(?!%)[=-]? + (?> + [^\-%]* # normal* + (?> # special + (?: %(?!>) | -(?!%>) ) + [^\-%]* # normal* + )* + ) + (?: -?%> )? + /x + + START_OF_ERB = / + <%(?!%) + /x + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def reset_instance + super + @html_scanner.reset + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{ERB_RUBY_BLOCK}/o) + start_tag = match[/\A<%[-=]?/] + end_tag = match[/-?%?>?\z/] + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end + +end +end diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 3ce5003..76c87ca 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -1,400 +1,404 @@ -module CodeRay
-module Scanners
-
- # This scanner is really complex, since Ruby _is_ a complex language!
- #
- # It tries to highlight 100% of all common code,
- # and 90% of strange codes.
- #
- # It is optimized for HTML highlighting, and is not very useful for
- # parsing or pretty printing.
- #
- # For now, I think it's better than the scanners in VIM or Syntax, or
- # any highlighter I was able to find, except Caleb's RubyLexer.
- #
- # I hope it's also better than the rdoc/irb lexer.
- class Ruby < Scanner
-
- include Streamable
-
- register_for :ruby
-
- helper :patterns
-
- DEFAULT_OPTIONS = {
- :parse_regexps => true,
- }
-
- private
- def scan_tokens tokens, options
- parse_regexp = false # options[:parse_regexps]
- first_bake = saved_tokens = nil
- last_token_dot = false
- fancy_allowed = regexp_allowed = true
- heredocs = nil
- last_state = nil
- state = :initial
- depth = nil
- states = []
-
- patterns = Patterns # avoid constant lookup
-
- until eos?
- type = :error
- match = nil
- kind = nil
-
- if state.instance_of? patterns::StringState
-# {{{
- match = scan_until(state.pattern) || scan_until(/\z/)
- tokens << [match, :content] unless match.empty?
- break if eos?
-
- if state.heredoc and self[1] # end of heredoc
- match = getch.to_s
- match << scan_until(/$/) unless eos?
- tokens << [match, :delimiter]
- tokens << [:close, state.type]
- state = state.next_state
- next
- end
-
- case match = getch
-
- when state.delim
- if state.paren
- state.paren_depth -= 1
- if state.paren_depth > 0
- tokens << [match, :nesting_delimiter]
- next
- end
- end
- tokens << [match, :delimiter]
- if state.type == :regexp and not eos?
- modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
- tokens << [modifiers, :modifier] unless modifiers.empty?
- if parse_regexp
- extended = modifiers.index ?x
- tokens = saved_tokens
- regexp = tokens
- for text, type in regexp
- if text.is_a? ::String
- case type
- when :content
- text.scan(/([^#]+)|(#.*)/) do |plain, comment|
- if plain
- tokens << [plain, :content]
- else
- tokens << [comment, :comment]
- end
- end
- when :character
- if text[/\\(?:[swdSWDAzZbB]|\d+)/]
- tokens << [text, :modifier]
- else
- tokens << [text, type]
- end
- else
- tokens << [text, type]
- end
- else
- tokens << [text, type]
- end
- end
- first_bake = saved_tokens = nil
- end
- end
- tokens << [:close, state.type]
- fancy_allowed = regexp_allowed = false
- state = state.next_state
-
- when '\\'
- if state.interpreted
- if esc = scan(/ #{patterns::ESCAPE} /ox)
- tokens << [match + esc, :char]
- else
- tokens << [match, :error]
- end
- else
- case m = getch
- when state.delim, '\\'
- tokens << [match + m, :char]
- when nil
- tokens << [match, :error]
- else
- tokens << [match + m, :content]
- end
- end
-
- when '#'
- case peek(1)[0]
- when ?{
- states.push [state, depth, heredocs]
- fancy_allowed = regexp_allowed = true
- state = :initial
- depth = 1
- tokens << [:open, :inline]
- tokens << [match + getch, :delimiter]
- when ?$, ?@
- tokens << [match, :escape]
- last_state = state # scan one token as normal code, then return here
- state = :initial
- else
- raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
- end
-
- when state.paren
- state.paren_depth += 1
- tokens << [match, :nesting_delimiter]
-
- when /#{patterns::REGEXP_SYMBOLS}/ox
- tokens << [match, :function]
-
- else
- raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
-
- end
- next
-# }}}
- else
-# {{{
- if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
- ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
- fancy_allowed = true
- case m = match[0]
- when ?\s, ?\t, ?\f
- match << scan(/\s*/) unless eos? or heredocs
- type = :space
- when ?\n, ?\\
- type = :space
- if m == ?\n
- regexp_allowed = true
- state = :initial if state == :undef_comma_expected
- end
- if heredocs
- unscan # heredoc scanning needs \n at start
- state = heredocs.shift
- tokens << [:open, state.type]
- heredocs = nil if heredocs.empty?
- next
- else
- match << scan(/\s*/) unless eos?
- end
- when ?#, ?=, ?_
- type = :comment
- regexp_allowed = true
- else
- raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens
- end
- tokens << [match, type]
- next
-
- elsif state == :initial
-
- # IDENTS #
- if match = scan(/#{patterns::METHOD_NAME}/o)
- if last_token_dot
- type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
- else
- type = patterns::IDENT_KIND[match]
- if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
- type = :constant
- elsif type == :reserved
- state = patterns::DEF_NEW_STATE[match]
- end
- end
- ## experimental!
- fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
-
- # OPERATORS #
- elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or
- (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o))
- if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
- regexp_allowed = fancy_allowed = :set
- end
- last_token_dot = :set if match == '.' or match == '::'
- type = :operator
- unless states.empty?
- case match
- when '{'
- depth += 1
- when '}'
- depth -= 1
- if depth == 0
- state, depth, heredocs = states.pop
- tokens << [match, :delimiter]
- type = :inline
- match = :close
- end
- end
- end
-
- elsif match = scan(/ ['"] /mx)
- tokens << [:open, :string]
- type = :delimiter
- state = patterns::StringState.new :string, match == '"', match # important for streaming
-
- elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
- type = :instance_variable
-
- elsif regexp_allowed and match = scan(/\//)
- tokens << [:open, :regexp]
- type = :delimiter
- interpreted = true
- state = patterns::StringState.new :regexp, interpreted, match
- if parse_regexp
- tokens = []
- saved_tokens = tokens
- end
-
- elsif match = scan(/#{patterns::NUMERIC}/o)
- type = if self[1] then :float else :integer end
-
- elsif match = scan(/#{patterns::SYMBOL}/o)
- case delim = match[1]
- when ?', ?"
- tokens << [:open, :symbol]
- tokens << [':', :symbol]
- match = delim.chr
- type = :delimiter
- state = patterns::StringState.new :symbol, delim == ?", match
- else
- type = :symbol
- end
-
- elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
- regexp_allowed = fancy_allowed = :set
- type = :operator
-
- elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o)
- indented = self[1] == '-'
- quote = self[3]
- delim = self[quote ? 4 : 2]
- type = patterns::QUOTE_TO_TYPE[quote]
- tokens << [:open, type]
- tokens << [match, :delimiter]
- match = :close
- heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
- heredocs ||= [] # create heredocs if empty
- heredocs << heredoc
-
- elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o)
- type, interpreted = *patterns::FancyStringType.fetch(self[1]) do
- raise_inspect 'Unknown fancy string: %%%p' % k, tokens
- end
- tokens << [:open, type]
- state = patterns::StringState.new type, interpreted, self[2]
- type = :delimiter
-
- elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o)
- type = :integer
-
- elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
- regexp_allowed = fancy_allowed = :set
- type = :operator
-
- elsif match = scan(/`/)
- if last_token_dot
- type = :operator
- else
- tokens << [:open, :shell]
- type = :delimiter
- state = patterns::StringState.new :shell, true, match
- end
-
- elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
- type = :global_variable
-
- elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
- type = :class_variable
-
- else
- match = getch
-
- end
-
- elsif state == :def_expected
- state = :initial
- if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
- type = :method
- else
- next
- end
-
- elsif state == :undef_expected
- state = :undef_comma_expected
- if match = scan(/#{patterns::METHOD_NAME_EX}/o)
- type = :method
- elsif match = scan(/#{patterns::SYMBOL}/o)
- case delim = match[1]
- when ?', ?"
- tokens << [:open, :symbol]
- tokens << [':', :symbol]
- match = delim.chr
- type = :delimiter
- state = patterns::StringState.new :symbol, delim == ?", match
- state.next_state = :undef_comma_expected
- else
- type = :symbol
- end
- else
- state = :initial
- next
- end
-
- elsif state == :undef_comma_expected
- if match = scan(/,/)
- type = :operator
- state = :undef_expected
- else
- state = :initial
- next
- end
-
- elsif state == :module_expected
- if match = scan(/<</)
- type = :operator
- else
- state = :initial
- if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
- type = :class
- else
- next
- end
- end
-
- end
-# }}}
-
- regexp_allowed = regexp_allowed == :set
- fancy_allowed = fancy_allowed == :set
- last_token_dot = last_token_dot == :set
-
- if $DEBUG and (not kind or kind == :error)
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, type]
-
- if last_state
- state = last_state
- last_state = nil
- end
- end
- end
-
- states << state if state.is_a? patterns::StringState
- until states.empty?
- tokens << [:close, states.pop.type]
- end
-
- tokens
- end
- end
-
-end
-end
-
-# vim:fdm=marker
+module CodeRay +module Scanners + + # This scanner is really complex, since Ruby _is_ a complex language! + # + # It tries to highlight 100% of all common code, + # and 90% of strange codes. + # + # It is optimized for HTML highlighting, and is not very useful for + # parsing or pretty printing. + # + # For now, I think it's better than the scanners in VIM or Syntax, or + # any highlighter I was able to find, except Caleb's RubyLexer. + # + # I hope it's also better than the rdoc/irb lexer. + class Ruby < Scanner + + include Streamable + + register_for :ruby + + helper :patterns + + DEFAULT_OPTIONS = { + :parse_regexps => true, + } + + private + def scan_tokens tokens, options + parse_regexp = false # options[:parse_regexps] + first_bake = saved_tokens = nil + last_token_dot = false + fancy_allowed = regexp_allowed = true + heredocs = nil + last_state = nil + state = :initial + depth = nil + inline_block_stack = [] + + patterns = Patterns # avoid constant lookup + + until eos? + match = nil + kind = nil + + if state.instance_of? patterns::StringState +# {{{ + match = scan_until(state.pattern) || scan_until(/\z/) + tokens << [match, :content] unless match.empty? + break if eos? + + if state.heredoc and self[1] # end of heredoc + match = getch.to_s + match << scan_until(/$/) unless eos? + tokens << [match, :delimiter] + tokens << [:close, state.type] + state = state.next_state + next + end + + case match = getch + + when state.delim + if state.paren + state.paren_depth -= 1 + if state.paren_depth > 0 + tokens << [match, :nesting_delimiter] + next + end + end + tokens << [match, :delimiter] + if state.type == :regexp and not eos? + modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox) + tokens << [modifiers, :modifier] unless modifiers.empty? + if parse_regexp + extended = modifiers.index ?x + tokens = saved_tokens + regexp = tokens + for text, kind in regexp + if text.is_a? ::String + case kind + when :content + text.scan(/([^#]+)|(#.*)/) do |plain, comment| + if plain + tokens << [plain, :content] + else + tokens << [comment, :comment] + end + end + when :character + if text[/\\(?:[swdSWDAzZbB]|\d+)/] + tokens << [text, :modifier] + else + tokens << [text, kind] + end + else + tokens << [text, kind] + end + else + tokens << [text, kind] + end + end + first_bake = saved_tokens = nil + end + end + tokens << [:close, state.type] + fancy_allowed = regexp_allowed = false + state = state.next_state + + when '\\' + if state.interpreted + if esc = scan(/ #{patterns::ESCAPE} /ox) + tokens << [match + esc, :char] + else + tokens << [match, :error] + end + else + case m = getch + when state.delim, '\\' + tokens << [match + m, :char] + when nil + tokens << [match, :error] + else + tokens << [match + m, :content] + end + end + + when '#' + case peek(1)[0] + when ?{ + inline_block_stack << [state, depth, heredocs] + fancy_allowed = regexp_allowed = true + state = :initial + depth = 1 + tokens << [:open, :inline] + tokens << [match + getch, :delimiter] + when ?$, ?@ + tokens << [match, :escape] + last_state = state # scan one token as normal code, then return here + state = :initial + else + raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens + end + + when state.paren + state.paren_depth += 1 + tokens << [match, :nesting_delimiter] + + when /#{patterns::REGEXP_SYMBOLS}/ox + tokens << [match, :function] + + else + raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens + + end + next +# }}} + else +# {{{ + if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or + ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) + fancy_allowed = true + case m = match[0] + when ?\s, ?\t, ?\f + match << scan(/\s*/) unless eos? or heredocs + kind = :space + when ?\n, ?\\ + kind = :space + if m == ?\n + regexp_allowed = true + state = :initial if state == :undef_comma_expected + end + if heredocs + unscan # heredoc scanning needs \n at start + state = heredocs.shift + tokens << [:open, state.type] + heredocs = nil if heredocs.empty? + next + else + match << scan(/\s*/) unless eos? + end + when ?#, ?=, ?_ + kind = :comment + regexp_allowed = true + else + raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens + end + tokens << [match, kind] + next + + elsif state == :initial + + # IDENTS # + if match = scan(/#{patterns::METHOD_NAME}/o) + if last_token_dot + kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end + else + kind = patterns::IDENT_KIND[match] + if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) + kind = :constant + elsif kind == :reserved + state = patterns::DEF_NEW_STATE[match] + end + end + ## experimental! + fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/) + + # OPERATORS # + elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or + (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)) + if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ + regexp_allowed = fancy_allowed = :set + end + last_token_dot = :set if match == '.' or match == '::' + kind = :operator + unless inline_block_stack.empty? + case match + when '{' + depth += 1 + when '}' + depth -= 1 + if depth == 0 # closing brace of inline block reached + state, depth, heredocs = inline_block_stack.pop + tokens << [match, :delimiter] + kind = :inline + match = :close + end + end + end + + elsif match = scan(/ ['"] /mx) + tokens << [:open, :string] + kind = :delimiter + state = patterns::StringState.new :string, match == '"', match # important for streaming + + elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) + kind = :instance_variable + + elsif regexp_allowed and match = scan(/\//) + tokens << [:open, :regexp] + kind = :delimiter + interpreted = true + state = patterns::StringState.new :regexp, interpreted, match + if parse_regexp + tokens = [] + saved_tokens = tokens + end + + elsif match = scan(/#{patterns::NUMERIC}/o) + kind = if self[1] then :float else :integer end + + elsif match = scan(/#{patterns::SYMBOL}/o) + case delim = match[1] + when ?', ?" + tokens << [:open, :symbol] + tokens << [':', :symbol] + match = delim.chr + kind = :delimiter + state = patterns::StringState.new :symbol, delim == ?", match + else + kind = :symbol + end + + elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) + regexp_allowed = fancy_allowed = :set + kind = :operator + + elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o) + indented = self[1] == '-' + quote = self[3] + delim = self[quote ? 4 : 2] + kind = patterns::QUOTE_TO_TYPE[quote] + tokens << [:open, kind] + tokens << [match, :delimiter] + match = :close + heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart ) + heredocs ||= [] # create heredocs if empty + heredocs << heredoc + + elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o) + kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do + raise_inspect 'Unknown fancy string: %%%p' % k, tokens + end + tokens << [:open, kind] + state = patterns::StringState.new kind, interpreted, self[2] + kind = :delimiter + + elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o) + kind = :integer + + elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) + regexp_allowed = fancy_allowed = :set + kind = :operator + + elsif match = scan(/`/) + if last_token_dot + kind = :operator + else + tokens << [:open, :shell] + kind = :delimiter + state = patterns::StringState.new :shell, true, match + end + + elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o) + kind = :global_variable + + elsif match = scan(/#{patterns::CLASS_VARIABLE}/o) + kind = :class_variable + + else + kind = :error + match = getch + + end + + elsif state == :def_expected + state = :initial + if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) + kind = :method + else + next + end + + elsif state == :undef_expected + state = :undef_comma_expected + if match = scan(/#{patterns::METHOD_NAME_EX}/o) + kind = :method + elsif match = scan(/#{patterns::SYMBOL}/o) + case delim = match[1] + when ?', ?" + tokens << [:open, :symbol] + tokens << [':', :symbol] + match = delim.chr + kind = :delimiter + state = patterns::StringState.new :symbol, delim == ?", match + state.next_state = :undef_comma_expected + else + kind = :symbol + end + else + state = :initial + next + end + + elsif state == :undef_comma_expected + if match = scan(/,/) + kind = :operator + state = :undef_expected + else + state = :initial + next + end + + elsif state == :module_expected + if match = scan(/<</) + kind = :operator + else + state = :initial + if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox) + kind = :class + else + next + end + end + + end +# }}} + + regexp_allowed = regexp_allowed == :set + fancy_allowed = fancy_allowed == :set + last_token_dot = last_token_dot == :set + + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens, state + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + if last_state + state = last_state + last_state = nil + end + end + end + + inline_block_stack << [state] if state.is_a? patterns::StringState + until inline_block_stack.empty? + this_block = inline_block_stack.pop + tokens << [:close, :inline] if this_block.size > 1 + state = this_block.first + tokens << [:close, state.type] + end + + tokens + end + + end + +end +end + +# vim:fdm=marker |