summaryrefslogtreecommitdiff
path: root/lib/coderay/scanner.rb
diff options
context:
space:
mode:
authorKornelius Kalnbach <murphy@rubychan.de>2013-10-22 01:11:31 +0200
committerKornelius Kalnbach <murphy@rubychan.de>2013-10-22 01:16:32 +0200
commitb09e97b08c3c073e79159ff09f6a7e0779fcfd2e (patch)
tree03760e46268a69bdc67418afa34892a0f99040fc /lib/coderay/scanner.rb
parente93aae88985667189bb5b24ad0d5f54cb5fdba70 (diff)
downloadcoderay-b09e97b08c3c073e79159ff09f6a7e0779fcfd2e.tar.gz
use autoload again
Diffstat (limited to 'lib/coderay/scanner.rb')
-rw-r--r--lib/coderay/scanner.rb355
1 files changed, 0 insertions, 355 deletions
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb
deleted file mode 100644
index b3f7e17..0000000
--- a/lib/coderay/scanner.rb
+++ /dev/null
@@ -1,355 +0,0 @@
-# encoding: utf-8
-require 'strscan'
-
-module CodeRay
-
- autoload :WordList, coderay_path('helpers', 'word_list')
-
- # = Scanners
- #
- # This module holds the Scanner class and its subclasses.
- # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
- # can be found in coderay/scanners/ruby.
- #
- # Scanner also provides methods and constants for the register
- # mechanism and the [] method that returns the Scanner class
- # belonging to the given lang.
- #
- # See PluginHost.
- module Scanners
- extend PluginHost
- plugin_path File.dirname(__FILE__), 'scanners'
-
-
- # = Scanner
- #
- # The base class for all Scanners.
- #
- # It is a subclass of Ruby's great +StringScanner+, which
- # makes it easy to access the scanning methods inside.
- #
- # It is also +Enumerable+, so you can use it like an Array of
- # Tokens:
- #
- # require 'coderay'
- #
- # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
- #
- # for text, kind in c_scanner
- # puts text if kind == :operator
- # end
- #
- # # prints: (*==)++;
- #
- # OK, this is a very simple example :)
- # You can also use +map+, +any?+, +find+ and even +sort_by+,
- # if you want.
- class Scanner < StringScanner
-
- extend Plugin
- plugin_host Scanners
-
- # Raised if a Scanner fails while scanning
- ScanError = Class.new StandardError
-
- # The default options for all scanner classes.
- #
- # Define @default_options for subclasses.
- DEFAULT_OPTIONS = { }
-
- KINDS_NOT_LOC = [:comment, :doctype, :docstring]
-
- attr_accessor :state
-
- class << self
-
- # Normalizes the given code into a string with UNIX newlines, in the
- # scanner's internal encoding, with invalid and undefined charachters
- # replaced by placeholders. Always returns a new object.
- def normalize code
- # original = code
- code = code.to_s unless code.is_a? ::String
- return code if code.empty?
-
- if code.respond_to? :encoding
- code = encode_with_encoding code, self.encoding
- else
- code = to_unix code
- end
- # code = code.dup if code.eql? original
- code
- end
-
- # The typical filename suffix for this scanner's language.
- def file_extension extension = lang
- @file_extension ||= extension.to_s
- end
-
- # The encoding used internally by this scanner.
- def encoding name = 'UTF-8'
- @encoding ||= defined?(Encoding.find) && Encoding.find(name)
- end
-
- # The lang of this Scanner class, which is equal to its Plugin ID.
- def lang
- @plugin_id
- end
-
- protected
-
- def encode_with_encoding code, target_encoding
- if code.encoding == target_encoding
- if code.valid_encoding?
- return to_unix(code)
- else
- source_encoding = guess_encoding code
- end
- else
- source_encoding = code.encoding
- end
- # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
- code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
- end
-
- def to_unix code
- code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code
- end
-
- def guess_encoding s
- #:nocov:
- IO.popen("file -b --mime -", "w+") do |file|
- file.write s[0, 1024]
- file.close_write
- begin
- Encoding.find file.gets[/charset=([-\w]+)/, 1]
- rescue ArgumentError
- Encoding::BINARY
- end
- end
- #:nocov:
- end
-
- end
-
- # Create a new Scanner.
- #
- # * +code+ is the input String and is handled by the superclass
- # StringScanner.
- # * +options+ is a Hash with Symbols as keys.
- # It is merged with the default options of the class (you can
- # overwrite default options here.)
- #
- # Else, a Tokens object is used.
- def initialize code = '', options = {}
- if self.class == Scanner
- raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
- end
-
- @options = self.class::DEFAULT_OPTIONS.merge options
-
- super self.class.normalize(code)
-
- @tokens = options[:tokens] || Tokens.new
- @tokens.scanner = self if @tokens.respond_to? :scanner=
-
- setup
- end
-
- # Sets back the scanner. Subclasses should redefine the reset_instance
- # method instead of this one.
- def reset
- super
- reset_instance
- end
-
- # Set a new string to be scanned.
- def string= code
- code = self.class.normalize(code)
- super code
- reset_instance
- end
-
- # the Plugin ID for this scanner
- def lang
- self.class.lang
- end
-
- # the default file extension for this scanner
- def file_extension
- self.class.file_extension
- end
-
- # Scan the code and returns all tokens in a Tokens object.
- def tokenize source = nil, options = {}
- options = @options.merge(options)
-
- set_tokens_from_options options
- set_string_from_source source
-
- begin
- scan_tokens @tokens, options
- rescue => e
- message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state]
- raise_inspect e.message, @tokens, message, 30, e.backtrace
- end
-
- @cached_tokens = @tokens
- if source.is_a? Array
- @tokens.split_into_parts(*source.map { |part| part.size })
- else
- @tokens
- end
- end
-
- # Cache the result of tokenize.
- def tokens
- @cached_tokens ||= tokenize
- end
-
- # Traverse the tokens.
- def each &block
- tokens.each(&block)
- end
- include Enumerable
-
- # The current line position of the scanner, starting with 1.
- # See also: #column.
- #
- # Beware, this is implemented inefficiently. It should be used
- # for debugging only.
- def line pos = self.pos
- return 1 if pos <= 0
- binary_string[0...pos].count("\n") + 1
- end
-
- # The current column position of the scanner, starting with 1.
- # See also: #line.
- def column pos = self.pos
- return 1 if pos <= 0
- pos - (binary_string.rindex(?\n, pos - 1) || -1)
- end
-
- # The string in binary encoding.
- #
- # To be used with #pos, which is the index of the byte the scanner
- # will scan next.
- def binary_string
- @binary_string ||=
- if string.respond_to?(:bytesize) && string.bytesize != string.size
- #:nocov:
- string.dup.force_encoding('binary')
- #:nocov:
- else
- string
- end
- end
-
- protected
-
- # Can be implemented by subclasses to do some initialization
- # that has to be done once per instance.
- #
- # Use reset for initialization that has to be done once per
- # scan.
- def setup # :doc:
- end
-
- def set_string_from_source source
- case source
- when Array
- self.string = self.class.normalize(source.join)
- when nil
- reset
- else
- self.string = self.class.normalize(source)
- end
- end
-
- def set_tokens_from_options options
- @tokens = options[:tokens] || @tokens || Tokens.new
- @tokens.scanner = self if @tokens.respond_to? :scanner=
- end
-
- # This is the central method, and commonly the only one a
- # subclass implements.
- #
- # Subclasses must implement this method; it must return +tokens+
- # and must only use Tokens#<< for storing scanned tokens!
- def scan_tokens tokens, options # :doc:
- raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
- end
-
- # Resets the scanner.
- def reset_instance
- @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
- @cached_tokens = nil
- @binary_string = nil if defined? @binary_string
- end
-
- SCAN_ERROR_MESSAGE = <<-MESSAGE
-
-
-***ERROR in %s: %s (after %s tokens)
-
-tokens:
-%s
-
-%s
-
-surrounding code:
-%p ~~ %p
-
-
-***ERROR***
-
- MESSAGE
-
- def raise_inspect_arguments message, tokens, state, ambit
- return File.basename(caller[0]),
- message,
- tokens_size(tokens),
- tokens_last(tokens, 10).map(&:inspect).join("\n"),
- scanner_state_info(state),
- binary_string[pos - ambit, ambit],
- binary_string[pos, ambit]
- end
-
- SCANNER_STATE_INFO = <<-INFO
-current line: %d column: %d pos: %d
-matched: %p state: %p
-bol?: %p, eos?: %p
- INFO
-
- def scanner_state_info state
- SCANNER_STATE_INFO % [
- line, column, pos,
- matched, state || 'No state given!',
- bol?, eos?,
- ]
- end
-
- # Scanner error with additional status information
- def raise_inspect message, tokens, state = self.state, ambit = 30, backtrace = caller
- raise ScanError, SCAN_ERROR_MESSAGE % raise_inspect_arguments(message, tokens, state, ambit), backtrace
- end
-
- def tokens_size tokens
- tokens.size if tokens.respond_to?(:size)
- end
-
- def tokens_last tokens, n
- tokens.respond_to?(:last) ? tokens.last(n) : []
- end
-
- # Shorthand for scan_until(/\z/).
- # This method also avoids a JRuby 1.9 mode bug.
- def scan_rest
- rest = self.rest
- terminate
- rest
- end
-
- end
-
- end
-end