From c6baf98c3e5ce3fdc9884968ba1a06fbe29ce49f Mon Sep 17 00:00:00 2001 From: murphy Date: Mon, 7 Mar 2011 22:36:53 +0000 Subject: lots of fixes and improvements to the Ruby scanner --- lib/coderay/scanners/ruby/string_state.rb | 71 +++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 lib/coderay/scanners/ruby/string_state.rb (limited to 'lib/coderay/scanners/ruby/string_state.rb') diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb new file mode 100644 index 0000000..14127c6 --- /dev/null +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -0,0 +1,71 @@ +# encoding: utf-8 +module CodeRay +module Scanners + + class Ruby + + class StringState < Struct.new :type, :interpreted, :delim, :heredoc, + :opening_paren, :paren_depth, :pattern, :next_state # :nodoc: all + + CLOSING_PAREN = Hash[ *%w[ + ( ) + [ ] + < > + { } + ] ].each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with << + + STRING_PATTERN = Hash.new do |h, k| + delim, interpreted = *k + # delim = delim.dup # workaround for old Ruby + delim_pattern = Regexp.escape(delim) + if closing_paren = CLOSING_PAREN[delim] + delim_pattern << Regexp.escape(closing_paren) + end + delim_pattern << '\\\\' unless delim == '\\' + + # special_escapes = + # case interpreted + # when :regexp_symbols + # '| [|?*+(){}\[\].^$]' + # end + + h[k] = + if interpreted && delim != '#' + / (?= [#{delim_pattern}] | \# [{$@] ) /mx + else + / (?= [#{delim_pattern}] ) /mx + end + end + + def initialize kind, interpreted, delim, heredoc = false + if heredoc + pattern = heredoc_pattern delim, interpreted, heredoc == :indented + delim = nil + else + pattern = STRING_PATTERN[ [delim, interpreted] ] + if closing_paren = CLOSING_PAREN[delim] + opening_paren = delim + delim = closing_paren + paren_depth = 1 + end + end + super kind, interpreted, delim, heredoc, opening_paren, paren_depth, pattern, :initial + end + + def heredoc_pattern delim, interpreted, indented + # delim = delim.dup # workaround for old Ruby + delim_pattern = Regexp.escape(delim) + delim_pattern = / \n #{ '(?>[ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x + if interpreted + / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc + else + / (?= #{delim_pattern}() | \\ ) /mx + end + end + + end + + end + +end +end -- cgit v1.2.1