diff options
Diffstat (limited to 'lib/coderay/scanner.rb')
| -rw-r--r-- | lib/coderay/scanner.rb | 298 | 
1 files changed, 298 insertions, 0 deletions
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb new file mode 100644 index 0000000..1cca607 --- /dev/null +++ b/lib/coderay/scanner.rb @@ -0,0 +1,298 @@ +module CodeRay
 +
 +  # This module holds class Scanner and its subclasses.
 +  # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
 +  # can be found in coderay/scanners/ruby.
 +  # 
 +  # Scanner also provides methods and constants for the register mechanism
 +  # and the [] method that returns the Scanner class belonging to the
 +  # given lang.
 +  module Scanners
 +
 +    # Raised if Scanners[] fails because:
 +    # * a file could not be found
 +    # * the requested Scanner is not registered
 +    ScannerNotFound = Class.new(Exception)
 +
 +    # Loaded Scanners are saved here.
 +    SCANNERS = Hash.new { |h, lang|
 +      raise ScannerNotFound, "No scanner for #{lang} found."
 +    }
 +
 +    class << self
 +
 +      # Registers a scanner class by setting SCANNERS[lang].
 +      #
 +      # Typically used in Scanners, for example in the Ruby scanner:
 +      #
 +      #   register_for :ruby
 +      def register scanner_class, *langs
 +        for lang in langs
 +          raise ArgumentError, 'lang must be a Symbol, but it was a %s' % lang.class unless lang.is_a? Symbol
 +          SCANNERS[lang] = scanner_class
 +        end
 +      end
 +
 +      # Loads the scanner class for +lang+ and returns it.
 +      #
 +      # Example:
 +      #
 +      #   Scanners[:xml].new
 +      #
 +      # +lang+ is converted using +normalize+ and must be
 +      # * a String containing only alphanumeric characters (\w+)
 +      # * a Symbol
 +      #
 +      # Strings are converted to lowercase symbols (so +'C'+ and +'c'+ load the
 +      # same scanner, namely the one registered for +:c+.)
 +      # 
 +      # If the scanner isn't registered yet, it is searched.
 +      # CodeRay expects that the scanner class is defined in
 +      #
 +      #   <install-dir>/coderay/scanners/<lang>.rb
 +      #
 +      # (See path_to.)
 +      #
 +      # If the file isn't found, a ScannerNotFound exception is raised
 +      #
 +      # The scanner should register itself using +register+. If the scanner is
 +      # still not found (because has not registered or registered under another lang),
 +      # a ScannerNotFound exception is raised.
 +      def [] lang
 +        lang = normalize lang
 +
 +        SCANNERS.fetch lang do
 +          scanner_file = path_to lang
 +
 +          begin
 +            require scanner_file
 +          rescue LoadError
 +            raise ScannerNotFound, "File #{scanner_file} not found."
 +          end
 +
 +          SCANNERS.fetch lang do
 +            raise ScannerNotFound, <<-ERR
 +No scanner for #{lang} found in #{scanner_file}.
 +Known scanners: #{SCANNERS}
 +            ERR
 +          end
 +        end
 +      end
 +
 +      # Alias for +[]+.
 +      alias load []
 +
 +      # Calculates the path where a scanner for +lang+
 +      # is expected to be. This is:
 +      # 
 +      #   <install-dir>/coderay/scanners/<lang>.rb
 +      def path_to lang
 +        File.join 'coderay', 'scanners', "#{lang}.rb"
 +      end
 +
 +      # Returns an array of all filenames in the scanners/ folder.
 +      # The extension +.rb+ is not included.
 +      def languages
 +        scanners = File.join File.dirname(__FILE__), 'scanners', '*.rb'
 +        Dir[scanners].map do |file|
 +          File.basename file, '.rb'
 +        end
 +      end
 +
 +      # Loads all scanners that +languages+ finds using +load+.
 +      def load_all
 +        for lang in languages
 +          load lang
 +        end
 +      end
 +
 +      # Converts +lang+ to a downcase Symbol if it is a String,
 +      # or returns +lang+ if it already is a Symbol.
 +      #
 +      # Raises +ArgumentError+ for all other objects, or if the
 +      # given String includes non-alphanumeric characters (\W).
 +      def normalize lang
 +        if lang.is_a? Symbol
 +          lang
 +        elsif lang.is_a? String
 +          if lang[/\w+/] == lang
 +            lang[/\w+/].downcase.to_sym
 +          else
 +            raise ArgumentError, "Invalid lang: '#{lang}' given."
 +          end
 +        elsif lang.nil?
 +          :plaintext
 +        else
 +          raise ArgumentError, "String or Symbol expected, but #{lang.class} given."
 +        end
 +      end
 +
 +    end
 +
 +
 +    require 'strscan'
 +    # The base class for all Scanners.
 +    #
 +    # It is a subclass of Ruby's great +StringScanner+, which
 +    # makes it easy to access the scanning methods inside.
 +    #
 +    # It is also +Enumerable+, so you can do this:
 +    #
 +    #   require 'coderay'
 +    #   
 +    #   c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
 +    #   
 +    #   for text, kind in c_scanner
 +    #     puts text if kind == :operator
 +    #   end
 +    #   
 +    #   # prints: (*==)++;
 +    # 
 +    # OK, this is not a very good example :)
 +    # You can also use map, any?, find and even sort_by.
 +    class Scanner < StringScanner
 +
 +      # Raised if a Scanner fails while scanning
 +      ScanError = Class.new(Exception)
 +
 +      require 'coderay/helpers/scanner_helper'
 +
 +      # The default options for all scanner classes.
 +      # 
 +      # Define @default_options for subclasses.
 +      DEFAULT_OPTIONS = { :stream => false }
 +
 +      class << self
 +        # Register the scanner class for all
 +        # +langs+.
 +        #
 +        # See Scanners.register.
 +        def register_for *langs
 +          Scanners.register self, *langs
 +        end
 +
 +        # Returns if the Scanner can be used in streaming mode.
 +        def streamable?
 +          is_a? Streamable
 +        end
 +        
 +      end
 +
 +=begin
 +      ## Excluded for speed reasons - protected seems to make methods slow.
 +
 +      # Save the StringScanner methods from being called.
 +      # This would not be useful for highlighting.
 +      strscan_public_methods = StringScanner.instance_methods - StringScanner.ancestors[1].instance_methods
 +      protected(*strscan_public_methods)
 +=end
 +      # Creates a new Scanner.
 +      #
 +      # * +code+ is the input String and is handled by the superclass StringScanner.
 +      # * +options+ is a Hash with Symbols as keys.
 +      #   It is merged with the default options of the class (you can overwrite
 +      #   default options here.)
 +      # * +block+ is the callback for streamed highlighting.
 +      #
 +      # If you set :stream to +true+ in the options, the Scanner uses a
 +      # TokenStream with the +block+ as callback to handle the tokens.
 +      #
 +      # Else, a Tokens object is used.
 +      def initialize code, options = {}, &block
 +        @options = self.class::DEFAULT_OPTIONS.merge options
 +        raise "I am only the basic Scanner class. I can't scan anything. :(\n" + 
 +          "Use my subclasses." if self.class == Scanner
 +
 +        # I love this hack. It seems to silence all dos/unix/mac newline problems.
 +        super code.gsub(/\r\n?/, "\n")
 +
 +        if @options[:stream]
 +          warn "warning in CodeRay::Scanner.new: :stream is set, but no block was given" unless block_given?
 +          raise NotStreamableError, self unless kind_of? Streamable
 +          @tokens = TokenStream.new(&block)
 +        else
 +          warn "warning in CodeRay::Scanner.new: Block given, but :stream is #{@options[:stream]}" if block_given?
 +          @tokens = Tokens.new
 +        end
 +      end
 +
 +      # More mnemonic accessor name for the input string.
 +      alias code string
 +
 +      # Scans the code and returns all tokens in a Tokens object.
 +      def tokenize options = {}
 +        options = @options.merge({}) #options
 +        if @options[:stream]  # :stream must have been set already
 +          reset ## what is this for?
 +          scan_tokens @tokens, options
 +          @tokens
 +        else
 +          @cached_tokens ||= scan_tokens @tokens, options
 +        end
 +      end
 +
 +      # you can also see this as a read-only attribute
 +      alias tokens tokenize
 +
 +      # Traverses the tokens.
 +      def each &block
 +        raise ArgumentError, 'Cannot traverse TokenStream.' if @options[:stream]
 +        tokens.each(&block)
 +      end
 +      include Enumerable
 +
 +      # The current line position of the scanner.
 +      #
 +      # Beware, this is implemented inefficiently. It should be used
 +      # for debugging only.
 +      def line
 +        string[0..pos].count("\n") + 1
 +      end
 +      
 +    protected
 +
 +      # This is the central method, and often the only one a subclass implements.
 +      # 
 +      # Subclasses must implement this method; it must return +tokens+ and must only
 +      # use Tokens#<< for storing scanned tokens.
 +      def scan_tokens tokens, options
 +        raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
 +      end
 +
 +      # Scanner error with additional status information
 +      def raise_inspect msg, tokens, ambit = 30
 +        raise ScanError, <<-EOE % [
 +
 +
 +***ERROR in %s: %s
 +
 +tokens:
 +%s
 +
 +current line: %d  pos = %d
 +matched: %p
 +bol? = %p,  eos? = %p
 +
 +surrounding code:
 +%p  ~~  %p
 +
 +
 +***ERROR***
 +
 +        EOE
 +          File.basename(caller[0]),
 +          msg,
 +          tokens.last(10).map { |t| t.inspect }.join("\n"),
 +          line, pos,
 +          matched, bol?, eos?,
 +          string[pos-ambit,ambit],
 +          string[pos,ambit],
 +        ]
 +      end
 +
 +    end
 +
 +  end
 +end
 +
 +# vim:sw=2:ts=2:et:tw=78
  | 
