From 26a8e5a0388199ac686db28d631b05a5b5aa02e1 Mon Sep 17 00:00:00 2001 From: murphy Date: Tue, 11 Jul 2006 05:37:50 +0000 Subject: Changed error handling of all scanners: :error tokens are OK now, even in debug mode, but token kind is nil unless assigned. Small fixes for C and Ruby scanners. Renamed local variable type to kind in Ruby scanner. Improved RHTML scanner to recognize -%> as delimiter. HTML encoder: improved handling of malformed token strings. Fixed PluginHost#inspect including docu. Scanner#raise_inspect also shows state if given. --- lib/coderay/encoders/html.rb | 494 +++++++++++----------- lib/coderay/helpers/plugin.rb | 18 +- lib/coderay/scanner.rb | 476 ++++++++++----------- lib/coderay/scanners/_map.rb | 29 +- lib/coderay/scanners/c.rb | 318 +++++++------- lib/coderay/scanners/delphi.rb | 260 ++++++------ lib/coderay/scanners/html.rb | 341 +++++++-------- lib/coderay/scanners/nitro_html.rb | 125 ------ lib/coderay/scanners/nitro_xhtml.rb | 130 ++++++ lib/coderay/scanners/rhtml.rb | 138 ++++--- lib/coderay/scanners/ruby.rb | 804 ++++++++++++++++++------------------ 11 files changed, 1586 insertions(+), 1547 deletions(-) delete mode 100644 lib/coderay/scanners/nitro_html.rb create mode 100644 lib/coderay/scanners/nitro_xhtml.rb (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index 60c56c1..bd7583a 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -1,245 +1,249 @@ -module CodeRay -module Encoders - - # = HTML Encoder - # - # This is CodeRay's most important highlighter: - # It provides save, fast XHTML generation and CSS support. - # - # == Usage - # - # require 'coderay' - # puts CodeRay.scan('Some /code/', :ruby).html #-> a HTML page - # puts CodeRay.scan('Some /code/', :ruby).html(:wrap => :span) #-> Some /code/ - # puts CodeRay.scan('Some /code/', :ruby).span #-> the same - # - # puts CodeRay.scan('Some code', :ruby).html( - # :wrap => nil, - # :line_numbers => :inline, - # :css => :style - # ) - # #-> 1 Some code - # - # == Options - # - # === :tab_width - # Convert \t characters to +n+ spaces (a number.) - # Default: 8 - # - # === :css - # How to include the styles; can be :class or :style. - # - # Default: :class - # - # === :wrap - # Wrap in :page, :div, :span or nil. - # - # You can also use Encoders::Div and Encoders::Span. - # - # Default: nil - # - # === :line_numbers - # Include line numbers in :table, :inline, :list or nil (no line numbers) - # - # Default: nil - # - # === :line_number_start - # Where to start with line number counting. - # - # Default: 1 - # - # === :bold_every - # Make every +n+-th number appear bold. - # - # Default: 10 - # - # === :hint - # Include some information into the output using the title attribute. - # Can be :info (show token type on mouse-over), :info_long (with full path) or :debug (via inspect). - # - # Default: false - class HTML < Encoder - - include Streamable - register_for :html - - FILE_EXTENSION = 'html' - - DEFAULT_OPTIONS = { - :tab_width => 8, - - :level => :xhtml, - :css => :class, - - :style => :cycnus, - - :wrap => nil, - - :line_numbers => nil, - :line_number_start => 1, - :bold_every => 10, - - :hint => false, - } - - helper :classes, :output, :css - - attr_reader :css - - protected - - HTML_ESCAPE = { #:nodoc: - '&' => '&', - '"' => '"', - '>' => '>', - '<' => '<', - } - - # This was to prevent illegal HTML. - # Strange chars should still be avoided in codes. - evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s] - evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' } - #ansi_chars = Array(0x7f..0xff) - #ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i } - # \x9 (\t) and \xA (\n) not included - #HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/ - HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/ - - TOKEN_KIND_TO_INFO = Hash.new { |h, kind| - h[kind] = - case kind - when :pre_constant - 'Predefined constant' - else - kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize } - end - } - - # Generate a hint about the given +classes+ in a +hint+ style. - # - # +hint+ may be :info, :info_long or :debug. - def self.token_path_to_hint hint, classes - return '' unless hint - title = - case hint - when :info - TOKEN_KIND_TO_INFO[classes.first] - when :info_long - classes.reverse.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/') - when :debug - classes.inspect - end - " title=\"#{title}\"" - end - - def setup options - super - - @HTML_ESCAPE = HTML_ESCAPE.dup - @HTML_ESCAPE["\t"] = ' ' * options[:tab_width] - - @opened = [nil] - @css = CSS.new options[:style] - - hint = options[:hint] - if hint and not [:debug, :info, :info_long].include? hint - raise ArgumentError, "Unknown value %p for :hint; expected :info, :debug, false or nil." % hint - end - - case options[:css] - - when :class - @css_style = Hash.new do |h, k| - if k.is_a? Array - type = k.first - else - type = k - end - c = ClassOfKind[type] - if c == :NO_HIGHLIGHT and not hint - h[k] = false - else - title = HTML.token_path_to_hint hint, (k[1..-1] << k.first) - h[k] = '' % [title, c] - end - end - - when :style - @css_style = Hash.new do |h, k| - if k.is_a? Array - styles = k.dup - else - styles = [k] - end - type = styles.first - classes = styles.map { |c| ClassOfKind[c] } - if classes.first == :NO_HIGHLIGHT and not hint - h[k] = false - else - styles.shift if [:delimiter, :modifier, :content, :escape].include? styles.first - title = HTML.token_path_to_hint hint, styles - classes.delete 'il' - style = @css[*classes] - h[k] = - if style - '' % [title, style] - else - false - end - end - end - - else - raise ArgumentError, "Unknown value %p for :css." % options[:css] - - end - end - - def finish options - not_needed = @opened.shift - @out << '' * @opened.size - warn '%d tokens still open' % @opened.size unless @opened.empty? - - @out.extend Output - @out.css = @css - @out.numerize! options[:line_numbers], options - @out.wrap! options[:wrap] - - super - end - - def token text, type - if text.is_a? ::String - if text =~ /#{HTML_ESCAPE_PATTERN}/o - text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } - end - @opened[0] = type - if style = @css_style[@opened] - @out << style << text << '' - else - @out << text - end - else - case text - when :open - @opened[0] = type - @out << (@css_style[@opened] || '') - @opened << type - when :close - unless @opened.empty? - raise 'Malformed token stream: Trying to close a token that was never opened.' unless @opened.size > 1 - @out << '' - @opened.pop - end - when nil - raise 'Token with nil as text was given: %p' % [[text, type]] - else - raise 'unknown token kind: %p' % text - end - end - end - - end - -end -end +module CodeRay +module Encoders + + # = HTML Encoder + # + # This is CodeRay's most important highlighter: + # It provides save, fast XHTML generation and CSS support. + # + # == Usage + # + # require 'coderay' + # puts CodeRay.scan('Some /code/', :ruby).html #-> a HTML page + # puts CodeRay.scan('Some /code/', :ruby).html(:wrap => :span) #-> Some /code/ + # puts CodeRay.scan('Some /code/', :ruby).span #-> the same + # + # puts CodeRay.scan('Some code', :ruby).html( + # :wrap => nil, + # :line_numbers => :inline, + # :css => :style + # ) + # #-> 1 Some code + # + # == Options + # + # === :tab_width + # Convert \t characters to +n+ spaces (a number.) + # Default: 8 + # + # === :css + # How to include the styles; can be :class or :style. + # + # Default: :class + # + # === :wrap + # Wrap in :page, :div, :span or nil. + # + # You can also use Encoders::Div and Encoders::Span. + # + # Default: nil + # + # === :line_numbers + # Include line numbers in :table, :inline, :list or nil (no line numbers) + # + # Default: nil + # + # === :line_number_start + # Where to start with line number counting. + # + # Default: 1 + # + # === :bold_every + # Make every +n+-th number appear bold. + # + # Default: 10 + # + # === :hint + # Include some information into the output using the title attribute. + # Can be :info (show token type on mouse-over), :info_long (with full path) or :debug (via inspect). + # + # Default: false + class HTML < Encoder + + include Streamable + register_for :html + + FILE_EXTENSION = 'html' + + DEFAULT_OPTIONS = { + :tab_width => 8, + + :level => :xhtml, + :css => :class, + + :style => :cycnus, + + :wrap => nil, + + :line_numbers => nil, + :line_number_start => 1, + :bold_every => 10, + + :hint => false, + } + + helper :classes, :output, :css + + attr_reader :css + + protected + + HTML_ESCAPE = { #:nodoc: + '&' => '&', + '"' => '"', + '>' => '>', + '<' => '<', + } + + # This was to prevent illegal HTML. + # Strange chars should still be avoided in codes. + evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s] + evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' } + #ansi_chars = Array(0x7f..0xff) + #ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i } + # \x9 (\t) and \xA (\n) not included + #HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/ + HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/ + + TOKEN_KIND_TO_INFO = Hash.new { |h, kind| + h[kind] = + case kind + when :pre_constant + 'Predefined constant' + else + kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize } + end + } + + # Generate a hint about the given +classes+ in a +hint+ style. + # + # +hint+ may be :info, :info_long or :debug. + def self.token_path_to_hint hint, classes + return '' unless hint + title = + case hint + when :info + TOKEN_KIND_TO_INFO[classes.first] + when :info_long + classes.reverse.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/') + when :debug + classes.inspect + end + " title=\"#{title}\"" + end + + def setup options + super + + @HTML_ESCAPE = HTML_ESCAPE.dup + @HTML_ESCAPE["\t"] = ' ' * options[:tab_width] + + @opened = [nil] + @css = CSS.new options[:style] + + hint = options[:hint] + if hint and not [:debug, :info, :info_long].include? hint + raise ArgumentError, "Unknown value %p for :hint; expected :info, :debug, false or nil." % hint + end + + case options[:css] + + when :class + @css_style = Hash.new do |h, k| + if k.is_a? Array + type = k.first + else + type = k + end + c = ClassOfKind[type] + if c == :NO_HIGHLIGHT and not hint + h[k] = false + else + title = HTML.token_path_to_hint hint, (k[1..-1] << k.first) + h[k] = '' % [title, c] + end + end + + when :style + @css_style = Hash.new do |h, k| + if k.is_a? Array + styles = k.dup + else + styles = [k] + end + type = styles.first + classes = styles.map { |c| ClassOfKind[c] } + if classes.first == :NO_HIGHLIGHT and not hint + h[k] = false + else + styles.shift if [:delimiter, :modifier, :content, :escape].include? styles.first + title = HTML.token_path_to_hint hint, styles + classes.delete 'il' + style = @css[*classes] + h[k] = + if style + '' % [title, style] + else + false + end + end + end + + else + raise ArgumentError, "Unknown value %p for :css." % options[:css] + + end + end + + def finish options + not_needed = @opened.shift + @out << '' * @opened.size + warn '%d tokens still open: %p' % [@opened.size, @opened] unless @opened.empty? + + @out.extend Output + @out.css = @css + @out.numerize! options[:line_numbers], options + @out.wrap! options[:wrap] + + super + end + + def token text, type + if text.is_a? ::String + if text =~ /#{HTML_ESCAPE_PATTERN}/o + text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } + end + @opened[0] = type + if style = @css_style[@opened] + @out << style << text << '' + else + @out << text + end + else + case text + when :open + @opened[0] = type + @out << (@css_style[@opened] || '') + @opened << type + when :close + if @opened.empty? + # nothing to close + else + if @opened.size == 1 or @opened.last != type + raise 'Malformed token stream: Trying to close a token (%p) that is not open. Open are: %p.' % [type, @opened[1..-1]] if $DEBUG + end + @out << '' + @opened.pop + end + when nil + raise 'Token with nil as text was given: %p' % [[text, type]] + else + raise 'unknown token kind: %p' % text + end + end + end + + end + +end +end diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb index 7e90279..742717d 100644 --- a/lib/coderay/helpers/plugin.rb +++ b/lib/coderay/helpers/plugin.rb @@ -168,6 +168,15 @@ module PluginHost end end + # Makes a map of all loaded plugins. + def inspect + map = plugin_hash.dup + map.each do |id, plugin| + map[id] = plugin.to_s[/(?>[\w_]+)$/] + end + "#{name}[#{host_id}]#{map.inspect}" + end + protected # Created a new plugin list and stores it to @plugin_hash. def create_plugin_hash @@ -194,15 +203,6 @@ protected end end - # Makes a map of all loaded scanners. - def inspect - map = plugin_hash.dup - map.each do |id, plugin| - map[id] = plugin.name[/(?>[\w_]+)$/] - end - map.inspect - end - # Loads the map file (see map). # # This is done automatically when plugin_path is called. diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb index 6d20211..83f73f2 100644 --- a/lib/coderay/scanner.rb +++ b/lib/coderay/scanner.rb @@ -1,238 +1,238 @@ -module CodeRay - - require 'coderay/helpers/plugin' - - # = Scanners - # - # $Id$ - # - # This module holds the Scanner class and its subclasses. - # For example, the Ruby scanner is named CodeRay::Scanners::Ruby - # can be found in coderay/scanners/ruby. - # - # Scanner also provides methods and constants for the register - # mechanism and the [] method that returns the Scanner class - # belonging to the given lang. - # - # See PluginHost. - module Scanners - extend PluginHost - plugin_path File.dirname(__FILE__), 'scanners' - - require 'strscan' - - # = Scanner - # - # The base class for all Scanners. - # - # It is a subclass of Ruby's great +StringScanner+, which - # makes it easy to access the scanning methods inside. - # - # It is also +Enumerable+, so you can use it like an Array of - # Tokens: - # - # require 'coderay' - # - # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;" - # - # for text, kind in c_scanner - # puts text if kind == :operator - # end - # - # # prints: (*==)++; - # - # OK, this is a very simple example :) - # You can also use +map+, +any?+, +find+ and even +sort_by+, - # if you want. - class Scanner < StringScanner - extend Plugin - plugin_host Scanners - - # Raised if a Scanner fails while scanning - ScanError = Class.new(Exception) - - require 'coderay/helpers/word_list' - - # The default options for all scanner classes. - # - # Define @default_options for subclasses. - DEFAULT_OPTIONS = { :stream => false } - - class << self - - # Returns if the Scanner can be used in streaming mode. - def streamable? - is_a? Streamable - end - - def normify code - code = code.to_s.to_unix - end - - end - -=begin -## Excluded for speed reasons; protected seems to make methods slow. - - # Save the StringScanner methods from being called. - # This would not be useful for highlighting. - strscan_public_methods = - StringScanner.instance_methods - - StringScanner.ancestors[1].instance_methods - protected(*strscan_public_methods) -=end - - # Create a new Scanner. - # - # * +code+ is the input String and is handled by the superclass - # StringScanner. - # * +options+ is a Hash with Symbols as keys. - # It is merged with the default options of the class (you can - # overwrite default options here.) - # * +block+ is the callback for streamed highlighting. - # - # If you set :stream to +true+ in the options, the Scanner uses a - # TokenStream with the +block+ as callback to handle the tokens. - # - # Else, a Tokens object is used. - def initialize code='', options = {}, &block - @options = self.class::DEFAULT_OPTIONS.merge options - raise "I am only the basic Scanner class. I can't scan "\ - "anything. :( Use my subclasses." if self.class == Scanner - - super Scanner.normify(code) - - @tokens = options[:tokens] - if @options[:stream] - warn "warning in CodeRay::Scanner.new: :stream is set, "\ - "but no block was given" unless block_given? - raise NotStreamableError, self unless kind_of? Streamable - @tokens ||= TokenStream.new(&block) - else - warn "warning in CodeRay::Scanner.new: Block given, "\ - "but :stream is #{@options[:stream]}" if block_given? - @tokens ||= Tokens.new - end - - setup - end - - # More mnemonic accessor name for the input string. - alias code string - - def reset - super - reset_instance - end - - def string= code - code = Scanner.normify(code) - super code - reset_instance - end - - # Scans the code and returns all tokens in a Tokens object. - def tokenize new_string=nil, options = {} - options = @options.merge(options) - self.string = new_string if new_string - @cached_tokens = - if @options[:stream] # :stream must have been set already - reset unless new_string - scan_tokens @tokens, options - @tokens - else - scan_tokens @tokens, options - end - end - - def tokens - @cached_tokens ||= tokenize - end - - # Traverses the tokens. - def each &block - raise ArgumentError, - 'Cannot traverse TokenStream.' if @options[:stream] - tokens.each(&block) - end - include Enumerable - - # The current line position of the scanner. - # - # Beware, this is implemented inefficiently. It should be used - # for debugging only. - def line - string[0..pos].count("\n") + 1 - end - - protected - - # Can be implemented by subclasses to do some initialization - # that has to be done once per instance. - # - # Use reset for initialization that has to be done once per - # scan. - def setup - end - - # This is the central method, and commonly the only one a - # subclass implements. - # - # Subclasses must implement this method; it must return +tokens+ - # and must only use Tokens#<< for storing scanned tokens! - def scan_tokens tokens, options - raise NotImplementedError, - "#{self.class}#scan_tokens not implemented." - end - - def reset_instance - @tokens.clear unless @options[:keep_tokens] - @cached_tokens = nil - end - - # Scanner error with additional status information - def raise_inspect msg, tokens, ambit = 30 - raise ScanError, <<-EOE % [ - - -***ERROR in %s: %s - -tokens: -%s - -current line: %d pos = %d -matched: %p -bol? = %p, eos? = %p - -surrounding code: -%p ~~ %p - - -***ERROR*** - - EOE - File.basename(caller[0]), - msg, - tokens.last(10).map { |t| t.inspect }.join("\n"), - line, pos, - matched, bol?, eos?, - string[pos-ambit,ambit], - string[pos,ambit], - ] - end - - end - - end -end - -class String - # I love this hack. It seems to silence all dos/unix/mac newline problems. - def to_unix - if index ?\r - gsub(/\r\n?/, "\n") - else - self - end - end -end +module CodeRay + + require 'coderay/helpers/plugin' + + # = Scanners + # + # $Id$ + # + # This module holds the Scanner class and its subclasses. + # For example, the Ruby scanner is named CodeRay::Scanners::Ruby + # can be found in coderay/scanners/ruby. + # + # Scanner also provides methods and constants for the register + # mechanism and the [] method that returns the Scanner class + # belonging to the given lang. + # + # See PluginHost. + module Scanners + extend PluginHost + plugin_path File.dirname(__FILE__), 'scanners' + + require 'strscan' + + # = Scanner + # + # The base class for all Scanners. + # + # It is a subclass of Ruby's great +StringScanner+, which + # makes it easy to access the scanning methods inside. + # + # It is also +Enumerable+, so you can use it like an Array of + # Tokens: + # + # require 'coderay' + # + # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;" + # + # for text, kind in c_scanner + # puts text if kind == :operator + # end + # + # # prints: (*==)++; + # + # OK, this is a very simple example :) + # You can also use +map+, +any?+, +find+ and even +sort_by+, + # if you want. + class Scanner < StringScanner + extend Plugin + plugin_host Scanners + + # Raised if a Scanner fails while scanning + ScanError = Class.new(Exception) + + require 'coderay/helpers/word_list' + + # The default options for all scanner classes. + # + # Define @default_options for subclasses. + DEFAULT_OPTIONS = { :stream => false } + + class << self + + # Returns if the Scanner can be used in streaming mode. + def streamable? + is_a? Streamable + end + + def normify code + code = code.to_s.to_unix + end + + end + +=begin +## Excluded for speed reasons; protected seems to make methods slow. + + # Save the StringScanner methods from being called. + # This would not be useful for highlighting. + strscan_public_methods = + StringScanner.instance_methods - + StringScanner.ancestors[1].instance_methods + protected(*strscan_public_methods) +=end + + # Create a new Scanner. + # + # * +code+ is the input String and is handled by the superclass + # StringScanner. + # * +options+ is a Hash with Symbols as keys. + # It is merged with the default options of the class (you can + # overwrite default options here.) + # * +block+ is the callback for streamed highlighting. + # + # If you set :stream to +true+ in the options, the Scanner uses a + # TokenStream with the +block+ as callback to handle the tokens. + # + # Else, a Tokens object is used. + def initialize code='', options = {}, &block + @options = self.class::DEFAULT_OPTIONS.merge options + raise "I am only the basic Scanner class. I can't scan "\ + "anything. :( Use my subclasses." if self.class == Scanner + + super Scanner.normify(code) + + @tokens = options[:tokens] + if @options[:stream] + warn "warning in CodeRay::Scanner.new: :stream is set, "\ + "but no block was given" unless block_given? + raise NotStreamableError, self unless kind_of? Streamable + @tokens ||= TokenStream.new(&block) + else + warn "warning in CodeRay::Scanner.new: Block given, "\ + "but :stream is #{@options[:stream]}" if block_given? + @tokens ||= Tokens.new + end + + setup + end + + # More mnemonic accessor name for the input string. + alias code string + + def reset + super + reset_instance + end + + def string= code + code = Scanner.normify(code) + super code + reset_instance + end + + # Scans the code and returns all tokens in a Tokens object. + def tokenize new_string=nil, options = {} + options = @options.merge(options) + self.string = new_string if new_string + @cached_tokens = + if @options[:stream] # :stream must have been set already + reset unless new_string + scan_tokens @tokens, options + @tokens + else + scan_tokens @tokens, options + end + end + + def tokens + @cached_tokens ||= tokenize + end + + # Traverses the tokens. + def each &block + raise ArgumentError, + 'Cannot traverse TokenStream.' if @options[:stream] + tokens.each(&block) + end + include Enumerable + + # The current line position of the scanner. + # + # Beware, this is implemented inefficiently. It should be used + # for debugging only. + def line + string[0..pos].count("\n") + 1 + end + + protected + + # Can be implemented by subclasses to do some initialization + # that has to be done once per instance. + # + # Use reset for initialization that has to be done once per + # scan. + def setup + end + + # This is the central method, and commonly the only one a + # subclass implements. + # + # Subclasses must implement this method; it must return +tokens+ + # and must only use Tokens#<< for storing scanned tokens! + def scan_tokens tokens, options + raise NotImplementedError, + "#{self.class}#scan_tokens not implemented." + end + + def reset_instance + @tokens.clear unless @options[:keep_tokens] + @cached_tokens = nil + end + + # Scanner error with additional status information + def raise_inspect msg, tokens, state = nil, ambit = 30 + raise ScanError, <<-EOE % [ + + +***ERROR in %s: %s + +tokens: +%s + +current line: %d pos = %d +matched: %p state: %p +bol? = %p, eos? = %p + +surrounding code: +%p ~~ %p + + +***ERROR*** + + EOE + File.basename(caller[0]), + msg, + tokens.last(10).map { |t| t.inspect }.join("\n"), + line, pos, + matched, state, bol?, eos?, + string[pos-ambit,ambit], + string[pos,ambit], + ] + end + + end + + end +end + +class String + # I love this hack. It seems to silence all dos/unix/mac newline problems. + def to_unix + if index ?\r + gsub(/\r\n?/, "\n") + else + self + end + end +end diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index 6268a6c..1c5fc89 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -1,14 +1,15 @@ -module CodeRay -module Scanners - - map :cpp => :c, - :plain => :plaintext, - :pascal => :delphi, - :irb => :ruby, - :xml => :html, - :xhtml => :nitro_html - - default :plain - -end -end +module CodeRay +module Scanners + + map :cpp => :c, + :plain => :plaintext, + :pascal => :delphi, + :irb => :ruby, + :xml => :html, + :xhtml => :nitro_xhtml, + :nitro => :nitro_xhtml + + default :plain + +end +end diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb index 66b8de1..be113d0 100644 --- a/lib/coderay/scanners/c.rb +++ b/lib/coderay/scanners/c.rb @@ -1,155 +1,163 @@ -module CodeRay -module Scanners - - class C < Scanner - - register_for :c - - RESERVED_WORDS = [ - 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', - 'for', 'goto', 'if', 'return', 'switch', 'while', - 'struct', 'union', 'enum', 'typedef', - 'static', 'register', 'auto', 'extern', - 'sizeof', - 'volatile', 'const', # C89 - 'inline', 'restrict', # C99 - ] - - PREDEFINED_TYPES = [ - 'int', 'long', 'short', 'char', 'void', - 'signed', 'unsigned', 'float', 'double', - 'bool', 'complex', # C99 - ] - - PREDEFINED_CONSTANTS = [ - 'EOF', 'NULL', - 'true', 'false', # C99 - ] - - IDENT_KIND = WordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(PREDEFINED_TYPES, :pre_type). - add(PREDEFINED_CONSTANTS, :pre_constant) - - ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x - UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x - - def scan_tokens tokens, options - - state = :initial - - until eos? - - kind = :error - match = nil - - case state - - when :initial - - if scan(/ \s+ | \\\n /x) - kind = :space - - elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) - kind = :comment - - elsif match = scan(/ \# \s* if \s* 0 /x) - match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? - kind = :comment - - elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) - kind = :operator - - elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = IDENT_KIND[match] - if kind == :ident and check(/:(?!:)/) - match << scan(/:/) - kind = :label - end - - elsif match = scan(/L?"/) - tokens << [:open, :string] - if match[0] == ?L - tokens << ['L', :modifier] - match = '"' - end - state = :string - kind = :delimiter - - elsif scan(/#\s*(\w*)/) - kind = :preprocessor # FIXME multiline preprocs - state = :include_expected if self[1] == 'include' - - elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) - kind = :char - - elsif scan(/0[xX][0-9A-Fa-f]+/) - kind = :hex - - elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) - kind = :oct - - elsif scan(/(?:\d+)(?![.eEfF])/) - kind = :integer - - elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) - kind = :float - - else - getch - end - - when :string - if scan(/[^\\"]+/) - kind = :content - elsif scan(/"/) - tokens << ['"', :delimiter] - tokens << [:close, :string] - state = :initial - next - elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - kind = :char - elsif scan(/ \\ | $ /x) - kind = :error - state = :initial - else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens - end - - when :include_expected - if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) - kind = :include - state = :initial - - elsif match = scan(/\s+/) - kind = :space - state = :initial if match.index ?\n - - else - getch - - end - - else - raise_inspect 'Unknown state', tokens - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - - end - - tokens - end - - end - -end -end +module CodeRay +module Scanners + + class C < Scanner + + register_for :c + + RESERVED_WORDS = [ + 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', + 'for', 'goto', 'if', 'return', 'switch', 'while', + 'struct', 'union', 'enum', 'typedef', + 'static', 'register', 'auto', 'extern', + 'sizeof', + 'volatile', 'const', # C89 + 'inline', 'restrict', # C99 + ] + + PREDEFINED_TYPES = [ + 'int', 'long', 'short', 'char', 'void', + 'signed', 'unsigned', 'float', 'double', + 'bool', 'complex', # C99 + ] + + PREDEFINED_CONSTANTS = [ + 'EOF', 'NULL', + 'true', 'false', # C99 + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = nil + match = nil + + case state + + when :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + kind = :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + kind = :comment + + elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident and check(/:(?!:)/) + match << scan(/:/) + kind = :label + end + + elsif match = scan(/L?"/) + tokens << [:open, :string] + if match[0] == ?L + tokens << ['L', :modifier] + match = '"' + end + state = :string + kind = :delimiter + + elsif scan(/#\s*(\w*)/) + kind = :preprocessor # FIXME multiline preprocs + state = :include_expected if self[1] == 'include' + + elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + kind = :char + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :hex + + elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + kind = :oct + + elsif scan(/(?:\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + kind = :error + + end + + when :string + if scan(/[^\\\n"]+/) + kind = :content + elsif scan(/"/) + tokens << ['"', :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :char + elsif scan(/ \\ | $ /x) + tokens << [:close, :string] + kind = :error + state = :initial + else + raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + end + + when :include_expected + if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + kind = :include + state = :initial + + elsif match = scan(/\s+/) + kind = :space + state = :initial if match.index ?\n + + else + getch + kind = :error + + end + + else + raise_inspect 'Unknown state', tokens + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + if state == :string + tokens << [:close, :string] + end + + tokens + end + + end + +end +end diff --git a/lib/coderay/scanners/delphi.rb b/lib/coderay/scanners/delphi.rb index d9d9e1d..c141874 100644 --- a/lib/coderay/scanners/delphi.rb +++ b/lib/coderay/scanners/delphi.rb @@ -1,129 +1,131 @@ -module CodeRay -module Scanners - - class Delphi < Scanner - - register_for :delphi - - RESERVED_WORDS = [ - 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class', - 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do', - 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization', - 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in', - 'inherited', 'initialization', 'inline', 'interface', 'is', 'label', - 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed', - 'procedure', 'program', 'property', 'raise', 'record', 'repeat', - 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar', - 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', - 'xor', 'on' - ] - - DIRECTIVES = [ - 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl', - 'contains', 'deprecated', 'dispid', 'dynamic', 'export', - 'external', 'far', 'forward', 'implements', 'local', - 'near', 'nodefault', 'on', 'overload', 'override', - 'package', 'pascal', 'platform', 'private', 'protected', 'public', - 'published', 'read', 'readonly', 'register', 'reintroduce', - 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs', - 'virtual', 'write', 'writeonly' - ] - - IDENT_KIND = CaseIgnoringWordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(DIRECTIVES, :directive) - - def scan_tokens tokens, options - - state = :initial - - until eos? - - kind = :error - match = nil - - if state == :initial - - if scan(/ \s+ /x) - kind = :space - - elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) - kind = :preprocessor - - elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) - kind = :comment - - elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x) - kind = :operator - - elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = IDENT_KIND[match] - - elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) - tokens << [:open, :char] - tokens << ["'", :delimiter] - tokens << [self[1], :content] - tokens << ["'", :delimiter] - tokens << [:close, :char] - next - - elsif match = scan(/ ' /x) - tokens << [:open, :string] - state = :string - kind = :delimiter - - elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) - kind = :char - - elsif scan(/ \$ [0-9A-Fa-f]+ /x) - kind = :hex - - elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) - kind = :integer - - elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) - kind = :float - - else - getch - end - - elsif state == :string - if scan(/[^\n']+/) - kind = :content - elsif scan(/''/) - kind = :char - elsif scan(/'/) - tokens << ["'", :delimiter] - tokens << [:close, :string] - state = :initial - next - elsif scan(/\n/) - state = :initial - else - raise "else case \' reached; %p not handled." % peek(1), tokens - end - - else - raise 'else-case reached', tokens - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - - end - - tokens - end - - end - -end -end +module CodeRay +module Scanners + + class Delphi < Scanner + + register_for :delphi + + RESERVED_WORDS = [ + 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class', + 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do', + 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization', + 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in', + 'inherited', 'initialization', 'inline', 'interface', 'is', 'label', + 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed', + 'procedure', 'program', 'property', 'raise', 'record', 'repeat', + 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar', + 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', + 'xor', 'on' + ] + + DIRECTIVES = [ + 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl', + 'contains', 'deprecated', 'dispid', 'dynamic', 'export', + 'external', 'far', 'forward', 'implements', 'local', + 'near', 'nodefault', 'on', 'overload', 'override', + 'package', 'pascal', 'platform', 'private', 'protected', 'public', + 'published', 'read', 'readonly', 'register', 'reintroduce', + 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs', + 'virtual', 'write', 'writeonly' + ] + + IDENT_KIND = CaseIgnoringWordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(DIRECTIVES, :directive) + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = nil + match = nil + + if state == :initial + + if scan(/ \s+ /x) + kind = :space + + elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) + kind = :preprocessor + + elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) + kind = :comment + + elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + + elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) + tokens << [:open, :char] + tokens << ["'", :delimiter] + tokens << [self[1], :content] + tokens << ["'", :delimiter] + tokens << [:close, :char] + next + + elsif match = scan(/ ' /x) + tokens << [:open, :string] + state = :string + kind = :delimiter + + elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) + kind = :char + + elsif scan(/ \$ [0-9A-Fa-f]+ /x) + kind = :hex + + elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) + kind = :integer + + elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) + kind = :float + + else + kind = :error + getch + + end + + elsif state == :string + if scan(/[^\n']+/) + kind = :content + elsif scan(/''/) + kind = :char + elsif scan(/'/) + tokens << ["'", :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/\n/) + state = :initial + else + raise "else case \' reached; %p not handled." % peek(1), tokens + end + + else + raise 'else-case reached', tokens + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + tokens + end + + end + +end +end diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 7cdc07e..181e5d3 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -1,167 +1,174 @@ -module CodeRay -module Scanners - - # HTML Scanner - # - # $Id$ - class HTML < Scanner - - include Streamable - register_for :html - - ATTR_NAME = /[\w.:-]+/ - ATTR_VALUE_UNQUOTED = ATTR_NAME - TAG_END = /\/?>/ - HEX = /[0-9a-fA-F]/ - ENTITY = / - & - (?: - \w+ - | - \# - (?: - \d+ - | - x#{HEX}+ - ) - ) - ; - /ox - - PLAIN_STRING_CONTENT = { - "'" => /[^&'>\n]+/, - '"' => /[^&">\n]+/, - } - - private - def setup - @state = :initial - @plain_string_content = nil - end - - def scan_tokens tokens, options - - state = @state - plain_string_content = @plain_string_content - - until eos? - - kind = :error - match = nil - - if scan(/\s+/m) - kind = :space - - else - - case state - - when :initial - if scan(//m) - kind = :comment - elsif scan(//m) - kind = :preprocessor - elsif scan(/<\?xml.*?\?>/m) - kind = :preprocessor - elsif scan(/<\?.*?\?>|<%.*?%>/m) - kind = :comment - elsif scan(/<\/[-\w_.:]*>/m) - kind = :tag - elsif match = scan(/<[-\w_.:]*>?/m) - kind = :tag - state = :attribute unless match[-1] == ?> - elsif scan(/[^<>&]+/) - kind = :plain - elsif scan(/#{ENTITY}/ox) - kind = :entity - elsif scan(/[>&]/) - kind = :error - else - raise_inspect '[BUG] else-case reached with state %p' % [state], tokens - end - - when :attribute - if scan(/#{TAG_END}/) - kind = :tag - state = :initial - elsif scan(/#{ATTR_NAME}/o) - kind = :attribute_name - state = :attribute_equal - else - getch - end - - when :attribute_equal - if scan(/=/) - kind = :operator - state = :attribute_value - elsif scan(/#{ATTR_NAME}/o) - kind = :attribute_name - elsif scan(/#{TAG_END}/o) - kind = :tag - state = :initial - elsif scan(/./) - state = :attribute - end - - when :attribute_value - if scan(/#{ATTR_VALUE_UNQUOTED}/o) - kind = :attribute_value - state = :attribute - elsif match = scan(/["']/) - tokens << [:open, :string] - state = :attribute_value_string - plain_string_content = PLAIN_STRING_CONTENT[match] - kind = :delimiter - elsif scan(/#{TAG_END}/o) - kind = :tag - state = :initial - else - getch - end - - when :attribute_value_string - if scan(plain_string_content) - kind = :content - elsif scan(/['"]/) - tokens << [matched, :delimiter] - tokens << [:close, :string] - state = :attribute - next - elsif scan(/#{ENTITY}/ox) - kind = :entity - elsif scan(/[\n>]/) - tokens << [:close, :string] - kind = :error - state = :initial - end - - else - raise_inspect 'Unknown state: %p' % [state], tokens - - end - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - end - - if options[:keep_state] - @state = state - @plain_string_content = plain_string_content - end - - tokens - end - - end - -end -end +module CodeRay +module Scanners + + # HTML Scanner + # + # $Id$ + class HTML < Scanner + + include Streamable + register_for :html + + ATTR_NAME = /[\w.:-]+/ + ATTR_VALUE_UNQUOTED = ATTR_NAME + TAG_END = /\/?>/ + HEX = /[0-9a-fA-F]/ + ENTITY = / + & + (?: + \w+ + | + \# + (?: + \d+ + | + x#{HEX}+ + ) + ) + ; + /ox + + PLAIN_STRING_CONTENT = { + "'" => /[^&'>\n]+/, + '"' => /[^&">\n]+/, + } + + def reset + super + @state = :initial + end + + private + def setup + @state = :initial + @plain_string_content = nil + end + + def scan_tokens tokens, options + + state = @state + plain_string_content = @plain_string_content + + until eos? + + kind = nil + match = nil + + if scan(/\s+/m) + kind = :space + + else + + case state + + when :initial + if scan(//m) + kind = :comment + elsif scan(//m) + kind = :preprocessor + elsif scan(/<\?xml.*?\?>/m) + kind = :preprocessor + elsif scan(/<\?.*?\?>|<%.*?%>/m) + kind = :comment + elsif scan(/<\/[-\w_.:]*>/m) + kind = :tag + elsif match = scan(/<[-\w_.:]+>?/m) + kind = :tag + state = :attribute unless match[-1] == ?> + elsif scan(/[^<>&]+/) + kind = :plain + elsif scan(/#{ENTITY}/ox) + kind = :entity + elsif scan(/[<>&]/) + kind = :error + else + raise_inspect '[BUG] else-case reached with state %p' % [state], tokens + end + + when :attribute + if scan(/#{TAG_END}/) + kind = :tag + state = :initial + elsif scan(/#{ATTR_NAME}/o) + kind = :attribute_name + state = :attribute_equal + else + kind = :error + getch + end + + when :attribute_equal + if scan(/=/) + kind = :operator + state = :attribute_value + elsif scan(/#{ATTR_NAME}/o) + kind = :attribute_name + elsif scan(/#{TAG_END}/o) + kind = :tag + state = :initial + elsif scan(/./) + state = :attribute + end + + when :attribute_value + if scan(/#{ATTR_VALUE_UNQUOTED}/o) + kind = :attribute_value + state = :attribute + elsif match = scan(/["']/) + tokens << [:open, :string] + state = :attribute_value_string + plain_string_content = PLAIN_STRING_CONTENT[match] + kind = :delimiter + elsif scan(/#{TAG_END}/o) + kind = :tag + state = :initial + else + kind = :error + getch + end + + when :attribute_value_string + if scan(plain_string_content) + kind = :content + elsif scan(/['"]/) + tokens << [matched, :delimiter] + tokens << [:close, :string] + state = :attribute + next + elsif scan(/#{ENTITY}/ox) + kind = :entity + elsif scan(/[\n>]/) + tokens << [:close, :string] + kind = :error + state = :initial + end + + else + raise_inspect 'Unknown state: %p' % [state], tokens + + end + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens, state + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + end + + if options[:keep_state] + @state = state + @plain_string_content = plain_string_content + end + + tokens + end + + end + +end +end diff --git a/lib/coderay/scanners/nitro_html.rb b/lib/coderay/scanners/nitro_html.rb deleted file mode 100644 index 5955195..0000000 --- a/lib/coderay/scanners/nitro_html.rb +++ /dev/null @@ -1,125 +0,0 @@ -module CodeRay -module Scanners - - load :html - load :ruby - - # RHTML Scanner - # - # $Id$ - class NitroHTML < Scanner - - include Streamable - register_for :nitro_html - - NITRO_RUBY_BLOCK = / - <\?r - (?> - [^\?]* - (?> \?(?!>) [^\?]* )* - ) - (?: \?> )? - | - - (?> - [^<]* - (?> <(?!\/ruby>) [^<]* )* - ) - (?: <\/ruby> )? - | - <% - (?> - [^%]* - (?> %(?!>) [^%]* )* - ) - (?: %> )? - /mx - - NITRO_VALUE_BLOCK = / - \# - (?: - \{ - [^{}]* - (?> - \{ [^}]* \} - (?> [^{}]* ) - )* - \}? - | \| [^|]* \|? - | \( [^)]* \)? - | \[ [^\]]* \]? - | \\ [^\\]* \\? - ) - /x - - NITRO_ENTITY = / - % (?: \#\d+ | \w+ ) ; - / - - START_OF_RUBY = / - (?=[<\#%]) - < (?: \?r | % | ruby> ) - | \# [{(|] - | % (?: \#\d+ | \w+ ) ; - /x - - CLOSING_PAREN = Hash.new do |h, p| - h[p] = p - end.update( { - '(' => ')', - '[' => ']', - '{' => '}', - } ) - - private - - def setup - @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true - @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true - end - - def scan_tokens tokens, options - - until eos? - - if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty? - @html_scanner.tokenize match - - elsif match = scan(/#{NITRO_VALUE_BLOCK}/o) - start_tag = match[0,2] - delimiter = CLOSING_PAREN[start_tag[1,1]] - end_tag = match[-1,1] == delimiter ? delimiter : '' - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -1 - end_tag.size] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - elsif match = scan(/#{NITRO_RUBY_BLOCK}/o) - start_tag = '' ? '?>' : '' - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -(end_tag.size)-1] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - elsif entity = scan(/#{NITRO_ENTITY}/o) - tokens << [entity, :entity] - - else - raise_inspect 'else-case reached!', tokens - end - - end - - tokens - - end - - end - -end -end diff --git a/lib/coderay/scanners/nitro_xhtml.rb b/lib/coderay/scanners/nitro_xhtml.rb new file mode 100644 index 0000000..baef162 --- /dev/null +++ b/lib/coderay/scanners/nitro_xhtml.rb @@ -0,0 +1,130 @@ +module CodeRay +module Scanners + + load :html + load :ruby + + # Nitro XHTML Scanner + # + # $Id$ + class NitroXHTML < Scanner + + include Streamable + register_for :nitro_xhtml + + NITRO_RUBY_BLOCK = / + <\?r + (?> + [^\?]* + (?> \?(?!>) [^\?]* )* + ) + (?: \?> )? + | + + (?> + [^<]* + (?> <(?!\/ruby>) [^<]* )* + ) + (?: <\/ruby> )? + | + <% + (?> + [^%]* + (?> %(?!>) [^%]* )* + ) + (?: %> )? + /mx + + NITRO_VALUE_BLOCK = / + \# + (?: + \{ + [^{}]* + (?> + \{ [^}]* \} + (?> [^{}]* ) + )* + \}? + | \| [^|]* \|? + | \( [^)]* \)? + | \[ [^\]]* \]? + | \\ [^\\]* \\? + ) + /x + + NITRO_ENTITY = / + % (?: \#\d+ | \w+ ) ; + / + + START_OF_RUBY = / + (?=[<\#%]) + < (?: \?r | % | ruby> ) + | \# [{(|] + | % (?: \#\d+ | \w+ ) ; + /x + + CLOSING_PAREN = Hash.new do |h, p| + h[p] = p + end.update( { + '(' => ')', + '[' => ']', + '{' => '}', + } ) + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def reset_instance + super + @html_scanner.reset + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{NITRO_VALUE_BLOCK}/o) + start_tag = match[0,2] + delimiter = CLOSING_PAREN[start_tag[1,1]] + end_tag = match[-1,1] == delimiter ? delimiter : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif match = scan(/#{NITRO_RUBY_BLOCK}/o) + start_tag = '' ? '?>' : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -(end_tag.size)-1] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif entity = scan(/#{NITRO_ENTITY}/o) + tokens << [entity, :entity] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end + +end +end diff --git a/lib/coderay/scanners/rhtml.rb b/lib/coderay/scanners/rhtml.rb index 15a7566..8afb727 100644 --- a/lib/coderay/scanners/rhtml.rb +++ b/lib/coderay/scanners/rhtml.rb @@ -1,65 +1,73 @@ -module CodeRay -module Scanners - - load :html - load :ruby - - # RHTML Scanner - # - # $Id$ - class RHTML < Scanner - - include Streamable - register_for :rhtml - - ERB_RUBY_BLOCK = / - <%(?!%)[=-]? - (?> - [^%]* - (?> %(?!>) [^%]* )* - ) - (?: %> )? - /x - - START_OF_ERB = / - <%(?!%) - /x - - private - - def setup - @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true - @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true - end - - def scan_tokens tokens, options - - until eos? - - if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty? - @html_scanner.tokenize match - - elsif match = scan(/#{ERB_RUBY_BLOCK}/o) - start_tag = match[/\A<%[-=]?/] - end_tag = match[/%?>?\z/] - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -1 - end_tag.size] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - else - raise_inspect 'else-case reached!', tokens - end - - end - - tokens - - end - - end - -end -end +module CodeRay +module Scanners + + load :html + load :ruby + + # RHTML Scanner + # + # $Id$ + class RHTML < Scanner + + include Streamable + register_for :rhtml + + ERB_RUBY_BLOCK = / + <%(?!%)[=-]? + (?> + [^\-%]* # normal* + (?> # special + (?: %(?!>) | -(?!%>) ) + [^\-%]* # normal* + )* + ) + (?: -?%> )? + /x + + START_OF_ERB = / + <%(?!%) + /x + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def reset_instance + super + @html_scanner.reset + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{ERB_RUBY_BLOCK}/o) + start_tag = match[/\A<%[-=]?/] + end_tag = match[/-?%?>?\z/] + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end + +end +end diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 3ce5003..76c87ca 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -1,400 +1,404 @@ -module CodeRay -module Scanners - - # This scanner is really complex, since Ruby _is_ a complex language! - # - # It tries to highlight 100% of all common code, - # and 90% of strange codes. - # - # It is optimized for HTML highlighting, and is not very useful for - # parsing or pretty printing. - # - # For now, I think it's better than the scanners in VIM or Syntax, or - # any highlighter I was able to find, except Caleb's RubyLexer. - # - # I hope it's also better than the rdoc/irb lexer. - class Ruby < Scanner - - include Streamable - - register_for :ruby - - helper :patterns - - DEFAULT_OPTIONS = { - :parse_regexps => true, - } - - private - def scan_tokens tokens, options - parse_regexp = false # options[:parse_regexps] - first_bake = saved_tokens = nil - last_token_dot = false - fancy_allowed = regexp_allowed = true - heredocs = nil - last_state = nil - state = :initial - depth = nil - states = [] - - patterns = Patterns # avoid constant lookup - - until eos? - type = :error - match = nil - kind = nil - - if state.instance_of? patterns::StringState -# {{{ - match = scan_until(state.pattern) || scan_until(/\z/) - tokens << [match, :content] unless match.empty? - break if eos? - - if state.heredoc and self[1] # end of heredoc - match = getch.to_s - match << scan_until(/$/) unless eos? - tokens << [match, :delimiter] - tokens << [:close, state.type] - state = state.next_state - next - end - - case match = getch - - when state.delim - if state.paren - state.paren_depth -= 1 - if state.paren_depth > 0 - tokens << [match, :nesting_delimiter] - next - end - end - tokens << [match, :delimiter] - if state.type == :regexp and not eos? - modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox) - tokens << [modifiers, :modifier] unless modifiers.empty? - if parse_regexp - extended = modifiers.index ?x - tokens = saved_tokens - regexp = tokens - for text, type in regexp - if text.is_a? ::String - case type - when :content - text.scan(/([^#]+)|(#.*)/) do |plain, comment| - if plain - tokens << [plain, :content] - else - tokens << [comment, :comment] - end - end - when :character - if text[/\\(?:[swdSWDAzZbB]|\d+)/] - tokens << [text, :modifier] - else - tokens << [text, type] - end - else - tokens << [text, type] - end - else - tokens << [text, type] - end - end - first_bake = saved_tokens = nil - end - end - tokens << [:close, state.type] - fancy_allowed = regexp_allowed = false - state = state.next_state - - when '\\' - if state.interpreted - if esc = scan(/ #{patterns::ESCAPE} /ox) - tokens << [match + esc, :char] - else - tokens << [match, :error] - end - else - case m = getch - when state.delim, '\\' - tokens << [match + m, :char] - when nil - tokens << [match, :error] - else - tokens << [match + m, :content] - end - end - - when '#' - case peek(1)[0] - when ?{ - states.push [state, depth, heredocs] - fancy_allowed = regexp_allowed = true - state = :initial - depth = 1 - tokens << [:open, :inline] - tokens << [match + getch, :delimiter] - when ?$, ?@ - tokens << [match, :escape] - last_state = state # scan one token as normal code, then return here - state = :initial - else - raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens - end - - when state.paren - state.paren_depth += 1 - tokens << [match, :nesting_delimiter] - - when /#{patterns::REGEXP_SYMBOLS}/ox - tokens << [match, :function] - - else - raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens - - end - next -# }}} - else -# {{{ - if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or - ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) - fancy_allowed = true - case m = match[0] - when ?\s, ?\t, ?\f - match << scan(/\s*/) unless eos? or heredocs - type = :space - when ?\n, ?\\ - type = :space - if m == ?\n - regexp_allowed = true - state = :initial if state == :undef_comma_expected - end - if heredocs - unscan # heredoc scanning needs \n at start - state = heredocs.shift - tokens << [:open, state.type] - heredocs = nil if heredocs.empty? - next - else - match << scan(/\s*/) unless eos? - end - when ?#, ?=, ?_ - type = :comment - regexp_allowed = true - else - raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens - end - tokens << [match, type] - next - - elsif state == :initial - - # IDENTS # - if match = scan(/#{patterns::METHOD_NAME}/o) - if last_token_dot - type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end - else - type = patterns::IDENT_KIND[match] - if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) - type = :constant - elsif type == :reserved - state = patterns::DEF_NEW_STATE[match] - end - end - ## experimental! - fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/) - - # OPERATORS # - elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or - (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)) - if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ - regexp_allowed = fancy_allowed = :set - end - last_token_dot = :set if match == '.' or match == '::' - type = :operator - unless states.empty? - case match - when '{' - depth += 1 - when '}' - depth -= 1 - if depth == 0 - state, depth, heredocs = states.pop - tokens << [match, :delimiter] - type = :inline - match = :close - end - end - end - - elsif match = scan(/ ['"] /mx) - tokens << [:open, :string] - type = :delimiter - state = patterns::StringState.new :string, match == '"', match # important for streaming - - elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) - type = :instance_variable - - elsif regexp_allowed and match = scan(/\//) - tokens << [:open, :regexp] - type = :delimiter - interpreted = true - state = patterns::StringState.new :regexp, interpreted, match - if parse_regexp - tokens = [] - saved_tokens = tokens - end - - elsif match = scan(/#{patterns::NUMERIC}/o) - type = if self[1] then :float else :integer end - - elsif match = scan(/#{patterns::SYMBOL}/o) - case delim = match[1] - when ?', ?" - tokens << [:open, :symbol] - tokens << [':', :symbol] - match = delim.chr - type = :delimiter - state = patterns::StringState.new :symbol, delim == ?", match - else - type = :symbol - end - - elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) - regexp_allowed = fancy_allowed = :set - type = :operator - - elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o) - indented = self[1] == '-' - quote = self[3] - delim = self[quote ? 4 : 2] - type = patterns::QUOTE_TO_TYPE[quote] - tokens << [:open, type] - tokens << [match, :delimiter] - match = :close - heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart ) - heredocs ||= [] # create heredocs if empty - heredocs << heredoc - - elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o) - type, interpreted = *patterns::FancyStringType.fetch(self[1]) do - raise_inspect 'Unknown fancy string: %%%p' % k, tokens - end - tokens << [:open, type] - state = patterns::StringState.new type, interpreted, self[2] - type = :delimiter - - elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o) - type = :integer - - elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) - regexp_allowed = fancy_allowed = :set - type = :operator - - elsif match = scan(/`/) - if last_token_dot - type = :operator - else - tokens << [:open, :shell] - type = :delimiter - state = patterns::StringState.new :shell, true, match - end - - elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o) - type = :global_variable - - elsif match = scan(/#{patterns::CLASS_VARIABLE}/o) - type = :class_variable - - else - match = getch - - end - - elsif state == :def_expected - state = :initial - if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) - type = :method - else - next - end - - elsif state == :undef_expected - state = :undef_comma_expected - if match = scan(/#{patterns::METHOD_NAME_EX}/o) - type = :method - elsif match = scan(/#{patterns::SYMBOL}/o) - case delim = match[1] - when ?', ?" - tokens << [:open, :symbol] - tokens << [':', :symbol] - match = delim.chr - type = :delimiter - state = patterns::StringState.new :symbol, delim == ?", match - state.next_state = :undef_comma_expected - else - type = :symbol - end - else - state = :initial - next - end - - elsif state == :undef_comma_expected - if match = scan(/,/) - type = :operator - state = :undef_expected - else - state = :initial - next - end - - elsif state == :module_expected - if match = scan(/< true, + } + + private + def scan_tokens tokens, options + parse_regexp = false # options[:parse_regexps] + first_bake = saved_tokens = nil + last_token_dot = false + fancy_allowed = regexp_allowed = true + heredocs = nil + last_state = nil + state = :initial + depth = nil + inline_block_stack = [] + + patterns = Patterns # avoid constant lookup + + until eos? + match = nil + kind = nil + + if state.instance_of? patterns::StringState +# {{{ + match = scan_until(state.pattern) || scan_until(/\z/) + tokens << [match, :content] unless match.empty? + break if eos? + + if state.heredoc and self[1] # end of heredoc + match = getch.to_s + match << scan_until(/$/) unless eos? + tokens << [match, :delimiter] + tokens << [:close, state.type] + state = state.next_state + next + end + + case match = getch + + when state.delim + if state.paren + state.paren_depth -= 1 + if state.paren_depth > 0 + tokens << [match, :nesting_delimiter] + next + end + end + tokens << [match, :delimiter] + if state.type == :regexp and not eos? + modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox) + tokens << [modifiers, :modifier] unless modifiers.empty? + if parse_regexp + extended = modifiers.index ?x + tokens = saved_tokens + regexp = tokens + for text, kind in regexp + if text.is_a? ::String + case kind + when :content + text.scan(/([^#]+)|(#.*)/) do |plain, comment| + if plain + tokens << [plain, :content] + else + tokens << [comment, :comment] + end + end + when :character + if text[/\\(?:[swdSWDAzZbB]|\d+)/] + tokens << [text, :modifier] + else + tokens << [text, kind] + end + else + tokens << [text, kind] + end + else + tokens << [text, kind] + end + end + first_bake = saved_tokens = nil + end + end + tokens << [:close, state.type] + fancy_allowed = regexp_allowed = false + state = state.next_state + + when '\\' + if state.interpreted + if esc = scan(/ #{patterns::ESCAPE} /ox) + tokens << [match + esc, :char] + else + tokens << [match, :error] + end + else + case m = getch + when state.delim, '\\' + tokens << [match + m, :char] + when nil + tokens << [match, :error] + else + tokens << [match + m, :content] + end + end + + when '#' + case peek(1)[0] + when ?{ + inline_block_stack << [state, depth, heredocs] + fancy_allowed = regexp_allowed = true + state = :initial + depth = 1 + tokens << [:open, :inline] + tokens << [match + getch, :delimiter] + when ?$, ?@ + tokens << [match, :escape] + last_state = state # scan one token as normal code, then return here + state = :initial + else + raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens + end + + when state.paren + state.paren_depth += 1 + tokens << [match, :nesting_delimiter] + + when /#{patterns::REGEXP_SYMBOLS}/ox + tokens << [match, :function] + + else + raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens + + end + next +# }}} + else +# {{{ + if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or + ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) + fancy_allowed = true + case m = match[0] + when ?\s, ?\t, ?\f + match << scan(/\s*/) unless eos? or heredocs + kind = :space + when ?\n, ?\\ + kind = :space + if m == ?\n + regexp_allowed = true + state = :initial if state == :undef_comma_expected + end + if heredocs + unscan # heredoc scanning needs \n at start + state = heredocs.shift + tokens << [:open, state.type] + heredocs = nil if heredocs.empty? + next + else + match << scan(/\s*/) unless eos? + end + when ?#, ?=, ?_ + kind = :comment + regexp_allowed = true + else + raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens + end + tokens << [match, kind] + next + + elsif state == :initial + + # IDENTS # + if match = scan(/#{patterns::METHOD_NAME}/o) + if last_token_dot + kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end + else + kind = patterns::IDENT_KIND[match] + if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) + kind = :constant + elsif kind == :reserved + state = patterns::DEF_NEW_STATE[match] + end + end + ## experimental! + fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/) + + # OPERATORS # + elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or + (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)) + if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ + regexp_allowed = fancy_allowed = :set + end + last_token_dot = :set if match == '.' or match == '::' + kind = :operator + unless inline_block_stack.empty? + case match + when '{' + depth += 1 + when '}' + depth -= 1 + if depth == 0 # closing brace of inline block reached + state, depth, heredocs = inline_block_stack.pop + tokens << [match, :delimiter] + kind = :inline + match = :close + end + end + end + + elsif match = scan(/ ['"] /mx) + tokens << [:open, :string] + kind = :delimiter + state = patterns::StringState.new :string, match == '"', match # important for streaming + + elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) + kind = :instance_variable + + elsif regexp_allowed and match = scan(/\//) + tokens << [:open, :regexp] + kind = :delimiter + interpreted = true + state = patterns::StringState.new :regexp, interpreted, match + if parse_regexp + tokens = [] + saved_tokens = tokens + end + + elsif match = scan(/#{patterns::NUMERIC}/o) + kind = if self[1] then :float else :integer end + + elsif match = scan(/#{patterns::SYMBOL}/o) + case delim = match[1] + when ?', ?" + tokens << [:open, :symbol] + tokens << [':', :symbol] + match = delim.chr + kind = :delimiter + state = patterns::StringState.new :symbol, delim == ?", match + else + kind = :symbol + end + + elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) + regexp_allowed = fancy_allowed = :set + kind = :operator + + elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o) + indented = self[1] == '-' + quote = self[3] + delim = self[quote ? 4 : 2] + kind = patterns::QUOTE_TO_TYPE[quote] + tokens << [:open, kind] + tokens << [match, :delimiter] + match = :close + heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart ) + heredocs ||= [] # create heredocs if empty + heredocs << heredoc + + elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o) + kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do + raise_inspect 'Unknown fancy string: %%%p' % k, tokens + end + tokens << [:open, kind] + state = patterns::StringState.new kind, interpreted, self[2] + kind = :delimiter + + elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o) + kind = :integer + + elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) + regexp_allowed = fancy_allowed = :set + kind = :operator + + elsif match = scan(/`/) + if last_token_dot + kind = :operator + else + tokens << [:open, :shell] + kind = :delimiter + state = patterns::StringState.new :shell, true, match + end + + elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o) + kind = :global_variable + + elsif match = scan(/#{patterns::CLASS_VARIABLE}/o) + kind = :class_variable + + else + kind = :error + match = getch + + end + + elsif state == :def_expected + state = :initial + if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) + kind = :method + else + next + end + + elsif state == :undef_expected + state = :undef_comma_expected + if match = scan(/#{patterns::METHOD_NAME_EX}/o) + kind = :method + elsif match = scan(/#{patterns::SYMBOL}/o) + case delim = match[1] + when ?', ?" + tokens << [:open, :symbol] + tokens << [':', :symbol] + match = delim.chr + kind = :delimiter + state = patterns::StringState.new :symbol, delim == ?", match + state.next_state = :undef_comma_expected + else + kind = :symbol + end + else + state = :initial + next + end + + elsif state == :undef_comma_expected + if match = scan(/,/) + kind = :operator + state = :undef_expected + else + state = :initial + next + end + + elsif state == :module_expected + if match = scan(/< 1 + state = this_block.first + tokens << [:close, state.type] + end + + tokens + end + + end + +end +end + +# vim:fdm=marker -- cgit v1.2.1