diff options
author | Kornelius Kalnbach <murphy@rubychan.de> | 2011-08-19 03:09:35 +0200 |
---|---|---|
committer | Kornelius Kalnbach <murphy@rubychan.de> | 2011-08-19 03:09:35 +0200 |
commit | 75bc5455af8c3c3381066aac3d5fff42264cac6f (patch) | |
tree | 589d2c912ddd94c517eb794bcdf3257f8348f3c0 /lib/coderay/scanner.rb | |
parent | fdd17b6a09efb275238a3baef275465d31452f2a (diff) | |
download | coderay-75bc5455af8c3c3381066aac3d5fff42264cac6f.tar.gz |
Major rewrite of encoders to support IO output; fixed some minor scanner bugs; cleanups; dropped NitroXHTML scanner; improved tests
Diffstat (limited to 'lib/coderay/scanner.rb')
-rw-r--r-- | lib/coderay/scanner.rb | 48 |
1 files changed, 30 insertions, 18 deletions
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb index 0e0723c..e638c2c 100644 --- a/lib/coderay/scanner.rb +++ b/lib/coderay/scanner.rb @@ -74,7 +74,7 @@ module CodeRay if code.respond_to? :encoding code = encode_with_encoding code, self.encoding else - code = to_unix code if code.index ?\r + code = to_unix code end # code = code.dup if code.eql? original code @@ -100,7 +100,7 @@ module CodeRay def encode_with_encoding code, target_encoding if code.encoding == target_encoding if code.valid_encoding? - return to_unix(code) + return to_unix code else source_encoding = guess_encoding code end @@ -112,7 +112,7 @@ module CodeRay end def to_unix code - code.gsub(/\r\n?/, "\n") + code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code end def guess_encoding s @@ -221,27 +221,39 @@ module CodeRay end include Enumerable - # The current line position of the scanner. See also #column. + # The current line position of the scanner, starting with 1. + # See also: #column. # # Beware, this is implemented inefficiently. It should be used # for debugging only. - def line - string[0..pos].count("\n") + 1 + def line pos = self.pos + return 1 if pos <= 0 + binary_string[0...pos].count("\n") + 1 end - # The current column position of the scanner. See also #line. + # The current column position of the scanner, starting with 1. + # See also: #line. # # Beware, this is implemented inefficiently. It should be used # for debugging only. def column pos = self.pos - return 0 if pos <= 0 - string = self.string - if string.respond_to?(:bytesize) && string.bytesize != string.size - #:nocov: - string = string.dup.force_encoding('binary') - #:nocov: - end - pos - (string.rindex(?\n, pos) || 0) + return 1 if pos <= 0 + pos - (binary_string.rindex(?\n, pos - 1) || -1) + end + + # The string in binary encoding. + # + # To be used with #pos, which is the index of the byte the scanner + # will scan next. + def binary_string + @binary_string ||= + if string.respond_to?(:bytesize) && string.bytesize != string.size + #:nocov: + string.dup.force_encoding('binary') + #:nocov: + else + string + end end protected @@ -267,7 +279,7 @@ module CodeRay def reset_instance @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens] @cached_tokens = nil - @bin_string = nil if defined? @bin_string + @binary_string = nil if defined? @binary_string end # Scanner error with additional status information @@ -297,8 +309,8 @@ surrounding code: tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '', line, column, pos, matched, state, bol?, eos?, - string[pos - ambit, ambit], - string[pos, ambit], + binary_string[pos - ambit, ambit], + binary_string[pos, ambit], ], backtrace end |