From b09e97b08c3c073e79159ff09f6a7e0779fcfd2e Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Tue, 22 Oct 2013 01:11:31 +0200 Subject: use autoload again --- lib/coderay/encoder.rb | 201 --------------------- lib/coderay/encoders.rb | 18 ++ lib/coderay/encoders/encoder.rb | 190 ++++++++++++++++++++ lib/coderay/encoders/html.rb | 2 +- lib/coderay/helpers/plugin.rb | 219 ----------------------- lib/coderay/helpers/plugin_host.rb | 223 +++++++++++++++++++++++ lib/coderay/scanner.rb | 355 ------------------------------------- lib/coderay/scanners.rb | 23 +++ lib/coderay/scanners/java.rb | 2 +- lib/coderay/scanners/scanner.rb | 337 +++++++++++++++++++++++++++++++++++ lib/coderay/style.rb | 23 --- lib/coderay/styles.rb | 11 ++ lib/coderay/styles/style.rb | 18 ++ lib/coderay/tokens.rb | 2 +- 14 files changed, 823 insertions(+), 801 deletions(-) delete mode 100644 lib/coderay/encoder.rb create mode 100644 lib/coderay/encoders.rb create mode 100644 lib/coderay/encoders/encoder.rb create mode 100644 lib/coderay/helpers/plugin_host.rb delete mode 100644 lib/coderay/scanner.rb create mode 100644 lib/coderay/scanners.rb create mode 100644 lib/coderay/scanners/scanner.rb delete mode 100644 lib/coderay/style.rb create mode 100644 lib/coderay/styles.rb create mode 100644 lib/coderay/styles/style.rb (limited to 'lib/coderay') diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb deleted file mode 100644 index d2d6c7e..0000000 --- a/lib/coderay/encoder.rb +++ /dev/null @@ -1,201 +0,0 @@ -module CodeRay - - # This module holds the Encoder class and its subclasses. - # For example, the HTML encoder is named CodeRay::Encoders::HTML - # can be found in coderay/encoders/html. - # - # Encoders also provides methods and constants for the register - # mechanism and the [] method that returns the Encoder class - # belonging to the given format. - module Encoders - - extend PluginHost - plugin_path File.dirname(__FILE__), 'encoders' - - # = Encoder - # - # The Encoder base class. Together with Scanner and - # Tokens, it forms the highlighting triad. - # - # Encoder instances take a Tokens object and do something with it. - # - # The most common Encoder is surely the HTML encoder - # (CodeRay::Encoders::HTML). It highlights the code in a colorful - # html page. - # If you want the highlighted code in a div or a span instead, - # use its subclasses Div and Span. - class Encoder - extend Plugin - plugin_host Encoders - - class << self - - # If FILE_EXTENSION isn't defined, this method returns the - # downcase class name instead. - def const_missing sym - if sym == :FILE_EXTENSION - (defined?(@plugin_id) && @plugin_id || name[/\w+$/].downcase).to_s - else - super - end - end - - # The default file extension for output file of this encoder class. - def file_extension - self::FILE_EXTENSION - end - - end - - # Subclasses are to store their default options in this constant. - DEFAULT_OPTIONS = { } - - # The options you gave the Encoder at creating. - attr_accessor :options, :scanner - - # Creates a new Encoder. - # +options+ is saved and used for all encode operations, as long - # as you don't overwrite it there by passing additional options. - # - # Encoder objects provide three encode methods: - # - encode simply takes a +code+ string and a +lang+ - # - encode_tokens expects a +tokens+ object instead - # - # Each method has an optional +options+ parameter. These are - # added to the options you passed at creation. - def initialize options = {} - @options = self.class::DEFAULT_OPTIONS.merge options - @@CODERAY_TOKEN_INTERFACE_DEPRECATION_WARNING_GIVEN = false - end - - # Encode a Tokens object. - def encode_tokens tokens, options = {} - options = @options.merge options - @scanner = tokens.scanner if tokens.respond_to? :scanner - setup options - compile tokens, options - finish options - end - - # Encode the given +code+ using the Scanner for +lang+. - def encode code, lang, options = {} - options = @options.merge options - @scanner = Scanners[lang].new code, CodeRay.get_scanner_options(options).update(:tokens => self) - setup options - @scanner.tokenize - finish options - end - - # You can use highlight instead of encode, if that seems - # more clear to you. - alias highlight encode - - # The default file extension for this encoder. - def file_extension - self.class.file_extension - end - - def << token - unless @@CODERAY_TOKEN_INTERFACE_DEPRECATION_WARNING_GIVEN - warn 'Using old Tokens#<< interface.' - @@CODERAY_TOKEN_INTERFACE_DEPRECATION_WARNING_GIVEN = true - end - self.token(*token) - end - - # Called with +content+ and +kind+ of the currently scanned token. - # For simple scanners, it's enougth to implement this method. - # - # By default, it calls text_token, begin_group, end_group, begin_line, - # or end_line, depending on the +content+. - def token content, kind - case content - when String - text_token content, kind - when :begin_group - begin_group kind - when :end_group - end_group kind - when :begin_line - begin_line kind - when :end_line - end_line kind - else - raise ArgumentError, 'Unknown token content type: %p, kind = %p' % [content, kind] - end - end - - # Called for each text token ([text, kind]), where text is a String. - def text_token text, kind - @out << text - end - - # Starts a token group with the given +kind+. - def begin_group kind - end - - # Ends a token group with the given +kind+. - def end_group kind - end - - # Starts a new line token group with the given +kind+. - def begin_line kind - end - - # Ends a new line token group with the given +kind+. - def end_line kind - end - - protected - - # Called with merged options before encoding starts. - # Sets @out to an empty string. - # - # See the HTML Encoder for an example of option caching. - def setup options - @out = get_output(options) - end - - def get_output options - options[:out] || '' - end - - # Append data.to_s to the output. Returns the argument. - def output data - @out << data.to_s - data - end - - # Called with merged options after encoding starts. - # The return value is the result of encoding, typically @out. - def finish options - @out - end - - # Do the encoding. - # - # The already created +tokens+ object must be used; it must be a - # Tokens object. - def compile tokens, options = {} - content = nil - for item in tokens - if item.is_a? Array - raise ArgumentError, 'Two-element array tokens are no longer supported.' - end - if content - token content, item - content = nil - else - content = item - end - end - raise 'odd number list for Tokens' if content - end - - alias tokens compile - public :tokens - - end - - end -end diff --git a/lib/coderay/encoders.rb b/lib/coderay/encoders.rb new file mode 100644 index 0000000..6599186 --- /dev/null +++ b/lib/coderay/encoders.rb @@ -0,0 +1,18 @@ +module CodeRay + + # This module holds the Encoder class and its subclasses. + # For example, the HTML encoder is named CodeRay::Encoders::HTML + # can be found in coderay/encoders/html. + # + # Encoders also provides methods and constants for the register + # mechanism and the [] method that returns the Encoder class + # belonging to the given format. + module Encoders + + extend PluginHost + plugin_path File.dirname(__FILE__), 'encoders' + + autoload :Encoder, CodeRay.coderay_path('encoders', 'encoder') + + end +end diff --git a/lib/coderay/encoders/encoder.rb b/lib/coderay/encoders/encoder.rb new file mode 100644 index 0000000..fa5695d --- /dev/null +++ b/lib/coderay/encoders/encoder.rb @@ -0,0 +1,190 @@ +module CodeRay + module Encoders + + # = Encoder + # + # The Encoder base class. Together with Scanner and + # Tokens, it forms the highlighting triad. + # + # Encoder instances take a Tokens object and do something with it. + # + # The most common Encoder is surely the HTML encoder + # (CodeRay::Encoders::HTML). It highlights the code in a colorful + # html page. + # If you want the highlighted code in a div or a span instead, + # use its subclasses Div and Span. + class Encoder + extend Plugin + plugin_host Encoders + + class << self + + # If FILE_EXTENSION isn't defined, this method returns the + # downcase class name instead. + def const_missing sym + if sym == :FILE_EXTENSION + (defined?(@plugin_id) && @plugin_id || name[/\w+$/].downcase).to_s + else + super + end + end + + # The default file extension for output file of this encoder class. + def file_extension + self::FILE_EXTENSION + end + + end + + # Subclasses are to store their default options in this constant. + DEFAULT_OPTIONS = { } + + # The options you gave the Encoder at creating. + attr_accessor :options, :scanner + + # Creates a new Encoder. + # +options+ is saved and used for all encode operations, as long + # as you don't overwrite it there by passing additional options. + # + # Encoder objects provide three encode methods: + # - encode simply takes a +code+ string and a +lang+ + # - encode_tokens expects a +tokens+ object instead + # + # Each method has an optional +options+ parameter. These are + # added to the options you passed at creation. + def initialize options = {} + @options = self.class::DEFAULT_OPTIONS.merge options + @@CODERAY_TOKEN_INTERFACE_DEPRECATION_WARNING_GIVEN = false + end + + # Encode a Tokens object. + def encode_tokens tokens, options = {} + options = @options.merge options + @scanner = tokens.scanner if tokens.respond_to? :scanner + setup options + compile tokens, options + finish options + end + + # Encode the given +code+ using the Scanner for +lang+. + def encode code, lang, options = {} + options = @options.merge options + @scanner = Scanners[lang].new code, CodeRay.get_scanner_options(options).update(:tokens => self) + setup options + @scanner.tokenize + finish options + end + + # You can use highlight instead of encode, if that seems + # more clear to you. + alias highlight encode + + # The default file extension for this encoder. + def file_extension + self.class.file_extension + end + + def << token + unless @@CODERAY_TOKEN_INTERFACE_DEPRECATION_WARNING_GIVEN + warn 'Using old Tokens#<< interface.' + @@CODERAY_TOKEN_INTERFACE_DEPRECATION_WARNING_GIVEN = true + end + self.token(*token) + end + + # Called with +content+ and +kind+ of the currently scanned token. + # For simple scanners, it's enougth to implement this method. + # + # By default, it calls text_token, begin_group, end_group, begin_line, + # or end_line, depending on the +content+. + def token content, kind + case content + when String + text_token content, kind + when :begin_group + begin_group kind + when :end_group + end_group kind + when :begin_line + begin_line kind + when :end_line + end_line kind + else + raise ArgumentError, 'Unknown token content type: %p, kind = %p' % [content, kind] + end + end + + # Called for each text token ([text, kind]), where text is a String. + def text_token text, kind + @out << text + end + + # Starts a token group with the given +kind+. + def begin_group kind + end + + # Ends a token group with the given +kind+. + def end_group kind + end + + # Starts a new line token group with the given +kind+. + def begin_line kind + end + + # Ends a new line token group with the given +kind+. + def end_line kind + end + + protected + + # Called with merged options before encoding starts. + # Sets @out to an empty string. + # + # See the HTML Encoder for an example of option caching. + def setup options + @out = get_output(options) + end + + def get_output options + options[:out] || '' + end + + # Append data.to_s to the output. Returns the argument. + def output data + @out << data.to_s + data + end + + # Called with merged options after encoding starts. + # The return value is the result of encoding, typically @out. + def finish options + @out + end + + # Do the encoding. + # + # The already created +tokens+ object must be used; it must be a + # Tokens object. + def compile tokens, options = {} + content = nil + for item in tokens + if item.is_a? Array + raise ArgumentError, 'Two-element array tokens are no longer supported.' + end + if content + token content, item + content = nil + else + content = item + end + end + raise 'odd number list for Tokens' if content + end + + alias tokens compile + public :tokens + + end + + end +end diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index d2ebb5a..093df08 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -289,7 +289,7 @@ module Encoders Hash.new do |h, kinds| begin css_class = css_class_for_kinds(kinds) - title = HTML.token_path_to_hint hint, kinds if hint + title = Html.token_path_to_hint hint, kinds if hint if css_class || title if method == :style diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb index 9a724ff..4567943 100644 --- a/lib/coderay/helpers/plugin.rb +++ b/lib/coderay/helpers/plugin.rb @@ -1,224 +1,5 @@ module CodeRay - # = PluginHost - # - # A simple subclass/subfolder plugin system. - # - # Example: - # class Generators - # extend PluginHost - # plugin_path 'app/generators' - # end - # - # class Generator - # extend Plugin - # PLUGIN_HOST = Generators - # end - # - # class FancyGenerator < Generator - # register_for :fancy - # end - # - # Generators[:fancy] #-> FancyGenerator - # # or - # CodeRay.require_plugin 'Generators/fancy' - # # or - # Generators::Fancy - module PluginHost - - # Raised if Encoders::[] fails because: - # * a file could not be found - # * the requested Plugin is not registered - PluginNotFound = Class.new LoadError - HostNotFound = Class.new LoadError - - PLUGIN_HOSTS = [] - PLUGIN_HOSTS_BY_ID = {} # dummy hash - - # Loads all plugins using list and load. - def load_all - for plugin in list - load plugin - end - end - - # Returns the Plugin for +id+. - # - # Example: - # yaml_plugin = MyPluginHost[:yaml] - def [] id, *args, &blk - plugin = validate_id(id) - begin - plugin = plugin_hash.[](plugin, *args, &blk) - end while plugin.is_a? String - plugin - end - - alias load [] - - # Tries to +load+ the missing plugin by translating +const+ to the - # underscore form (eg. LinesOfCode becomes lines_of_code). - def const_missing const - id = const.to_s. - gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2'). - gsub(/([a-z\d])([A-Z])/,'\1_\2'). - downcase - load id - end - - class << self - - # Adds the module/class to the PLUGIN_HOSTS list. - def extended mod - PLUGIN_HOSTS << mod - end - - end - - # The path where the plugins can be found. - def plugin_path *args - unless args.empty? - @plugin_path = File.expand_path File.join(*args) - end - @plugin_path ||= '' - end - - # Map a plugin_id to another. - # - # Usage: Put this in a file plugin_path/_map.rb. - # - # class MyColorHost < PluginHost - # map :navy => :dark_blue, - # :maroon => :brown, - # :luna => :moon - # end - def map hash - for from, to in hash - from = validate_id from - to = validate_id to - plugin_hash[from] = to unless plugin_hash.has_key? from - end - end - - # Define the default plugin to use when no plugin is found - # for a given id, or return the default plugin. - # - # See also map. - # - # class MyColorHost < PluginHost - # map :navy => :dark_blue - # default :gray - # end - # - # MyColorHost.default # loads and returns the Gray plugin - def default id = nil - if id - id = validate_id id - raise "The default plugin can't be named \"default\"." if id == :default - plugin_hash[:default] = id - else - load :default - end - end - - # Every plugin must register itself for +id+ by calling register_for, - # which calls this method. - # - # See Plugin#register_for. - def register plugin, id - plugin_hash[validate_id(id)] = plugin - end - - # A Hash of plugion_id => Plugin pairs. - def plugin_hash - @plugin_hash ||= (@plugin_hash = make_plugin_hash).tap { load_plugin_map } - end - - # Returns an array of all .rb files in the plugin path. - # - # The extension .rb is not included. - def list - Dir[path_to('*')].select do |file| - File.basename(file)[/^(?!_)\w+\.rb$/] - end.map do |file| - File.basename(file, '.rb').to_sym - end - end - - # Returns an array of all Plugins. - # - # Note: This loads all plugins using load_all. - def all_plugins - load_all - plugin_hash.values.grep(Class) - end - - # Loads the map file (see map). - # - # This is done automatically when plugin_path is called. - def load_plugin_map - mapfile = path_to '_map' - if File.exist? mapfile - require mapfile - true - else - false - end - end - - protected - - # Return a plugin hash that automatically loads plugins. - def make_plugin_hash - Hash.new do |h, plugin_id| - id = validate_id(plugin_id) - path = path_to id - begin - require path - rescue LoadError => boom - if h.has_key?(:default) - h[:default] - else - raise PluginNotFound, '%p could not load plugin %p: %s' % [self, id, boom] - end - else - # Plugin should have registered by now - if h.has_key? id - h[id] - else - raise PluginNotFound, "No #{self.name} plugin for #{id.inspect} found in #{path}." - end - end - end - end - - # Returns the expected path to the plugin file for the given id. - def path_to plugin_id - File.join plugin_path, "#{plugin_id}.rb" - end - - # Converts +id+ to a valid plugin ID String, or returns +nil+. - # - # Raises +ArgumentError+ for all other objects, or if the - # given String includes non-alphanumeric characters (\W). - def validate_id id - case id - when Symbol - id.to_s - when String - if id[/\w+/] == id - id.downcase - else - raise ArgumentError, "Invalid id given: #{id}" - end - else - raise ArgumentError, "Symbol or String expected, but #{id.class} given." - end - end - - end - - # = Plugin # # Plugins have to include this module. diff --git a/lib/coderay/helpers/plugin_host.rb b/lib/coderay/helpers/plugin_host.rb new file mode 100644 index 0000000..b0b3aef --- /dev/null +++ b/lib/coderay/helpers/plugin_host.rb @@ -0,0 +1,223 @@ +module CodeRay + + # = PluginHost + # + # A simple subclass/subfolder plugin system. + # + # Example: + # class Generators + # extend PluginHost + # plugin_path 'app/generators' + # end + # + # class Generator + # extend Plugin + # PLUGIN_HOST = Generators + # end + # + # class FancyGenerator < Generator + # register_for :fancy + # end + # + # Generators[:fancy] #-> FancyGenerator + # # or + # CodeRay.require_plugin 'Generators/fancy' + # # or + # Generators::Fancy + module PluginHost + + # Raised if Encoders::[] fails because: + # * a file could not be found + # * the requested Plugin is not registered + PluginNotFound = Class.new LoadError + HostNotFound = Class.new LoadError + + PLUGIN_HOSTS = [] + PLUGIN_HOSTS_BY_ID = {} # dummy hash + + # Loads all plugins using list and load. + def load_all + for plugin in list + load plugin + end + end + + # Returns the Plugin for +id+. + # + # Example: + # yaml_plugin = MyPluginHost[:yaml] + def [] id, *args, &blk + # const = id.to_s.titleize + # const_get const + plugin = validate_id(id) + begin + plugin = plugin_hash.[](plugin, *args, &blk) + end while plugin.is_a? String + plugin + end + + alias load [] + + # Tries to +load+ the missing plugin by translating +const+ to the + # underscore form (eg. LinesOfCode becomes lines_of_code). + def const_missing const + id = const.to_s. + gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2'). + gsub(/([a-z\d])([A-Z])/,'\1_\2'). + downcase + load id + end + + class << self + + # Adds the module/class to the PLUGIN_HOSTS list. + def extended mod + PLUGIN_HOSTS << mod + end + + end + + # The path where the plugins can be found. + def plugin_path *args + unless args.empty? + @plugin_path = File.expand_path File.join(*args) + end + @plugin_path ||= '' + end + + # Map a plugin_id to another. + # + # Usage: Put this in a file plugin_path/_map.rb. + # + # class MyColorHost < PluginHost + # map :navy => :dark_blue, + # :maroon => :brown, + # :luna => :moon + # end + def map hash + for from, to in hash + from = validate_id from + to = validate_id to + plugin_hash[from] = to unless plugin_hash.has_key? from + end + end + + # Define the default plugin to use when no plugin is found + # for a given id, or return the default plugin. + # + # See also map. + # + # class MyColorHost < PluginHost + # map :navy => :dark_blue + # default :gray + # end + # + # MyColorHost.default # loads and returns the Gray plugin + def default id = nil + if id + id = validate_id id + raise "The default plugin can't be named \"default\"." if id == :default + plugin_hash[:default] = id + else + load :default + end + end + + # Every plugin must register itself for +id+ by calling register_for, + # which calls this method. + # + # See Plugin#register_for. + def register plugin, id + plugin_hash[validate_id(id)] = plugin + end + + # A Hash of plugion_id => Plugin pairs. + def plugin_hash + @plugin_hash ||= (@plugin_hash = make_plugin_hash).tap { load_plugin_map } + end + + # Returns an array of all .rb files in the plugin path. + # + # The extension .rb is not included. + def list + Dir[path_to('*')].select do |file| + File.basename(file)[/^(?!_)\w+\.rb$/] + end.map do |file| + File.basename(file, '.rb').to_sym + end + end + + # Returns an array of all Plugins. + # + # Note: This loads all plugins using load_all. + def all_plugins + load_all + plugin_hash.values.grep(Class) + end + + # Loads the map file (see map). + # + # This is done automatically when plugin_path is called. + def load_plugin_map + mapfile = path_to '_map' + if File.exist? mapfile + require mapfile + true + else + false + end + end + + protected + + # Return a plugin hash that automatically loads plugins. + def make_plugin_hash + Hash.new do |h, plugin_id| + id = validate_id(plugin_id) + path = path_to id + begin + require path + rescue LoadError => boom + if h.has_key?(:default) + h[:default] + else + raise PluginNotFound, '%p could not load plugin %p: %s' % [self, id, boom] + end + else + # Plugin should have registered by now + if h.has_key? id + h[id] + else + raise PluginNotFound, "No #{self.name} plugin for #{id.inspect} found in #{path}." + end + end + end + end + + # Returns the expected path to the plugin file for the given id. + def path_to plugin_id + File.join plugin_path, "#{plugin_id}.rb" + end + + # Converts +id+ to a valid plugin ID String, or returns +nil+. + # + # Raises +ArgumentError+ for all other objects, or if the + # given String includes non-alphanumeric characters (\W). + def validate_id id + case id + when Symbol + id.to_s + when String + if id[/\w+/] == id + id.downcase + else + raise ArgumentError, "Invalid id given: #{id}" + end + else + raise ArgumentError, "Symbol or String expected, but #{id.class} given." + end + end + + end + +end diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb deleted file mode 100644 index b3f7e17..0000000 --- a/lib/coderay/scanner.rb +++ /dev/null @@ -1,355 +0,0 @@ -# encoding: utf-8 -require 'strscan' - -module CodeRay - - autoload :WordList, coderay_path('helpers', 'word_list') - - # = Scanners - # - # This module holds the Scanner class and its subclasses. - # For example, the Ruby scanner is named CodeRay::Scanners::Ruby - # can be found in coderay/scanners/ruby. - # - # Scanner also provides methods and constants for the register - # mechanism and the [] method that returns the Scanner class - # belonging to the given lang. - # - # See PluginHost. - module Scanners - extend PluginHost - plugin_path File.dirname(__FILE__), 'scanners' - - - # = Scanner - # - # The base class for all Scanners. - # - # It is a subclass of Ruby's great +StringScanner+, which - # makes it easy to access the scanning methods inside. - # - # It is also +Enumerable+, so you can use it like an Array of - # Tokens: - # - # require 'coderay' - # - # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;" - # - # for text, kind in c_scanner - # puts text if kind == :operator - # end - # - # # prints: (*==)++; - # - # OK, this is a very simple example :) - # You can also use +map+, +any?+, +find+ and even +sort_by+, - # if you want. - class Scanner < StringScanner - - extend Plugin - plugin_host Scanners - - # Raised if a Scanner fails while scanning - ScanError = Class.new StandardError - - # The default options for all scanner classes. - # - # Define @default_options for subclasses. - DEFAULT_OPTIONS = { } - - KINDS_NOT_LOC = [:comment, :doctype, :docstring] - - attr_accessor :state - - class << self - - # Normalizes the given code into a string with UNIX newlines, in the - # scanner's internal encoding, with invalid and undefined charachters - # replaced by placeholders. Always returns a new object. - def normalize code - # original = code - code = code.to_s unless code.is_a? ::String - return code if code.empty? - - if code.respond_to? :encoding - code = encode_with_encoding code, self.encoding - else - code = to_unix code - end - # code = code.dup if code.eql? original - code - end - - # The typical filename suffix for this scanner's language. - def file_extension extension = lang - @file_extension ||= extension.to_s - end - - # The encoding used internally by this scanner. - def encoding name = 'UTF-8' - @encoding ||= defined?(Encoding.find) && Encoding.find(name) - end - - # The lang of this Scanner class, which is equal to its Plugin ID. - def lang - @plugin_id - end - - protected - - def encode_with_encoding code, target_encoding - if code.encoding == target_encoding - if code.valid_encoding? - return to_unix(code) - else - source_encoding = guess_encoding code - end - else - source_encoding = code.encoding - end - # print "encode_with_encoding from #{source_encoding} to #{target_encoding}" - code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace - end - - def to_unix code - code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code - end - - def guess_encoding s - #:nocov: - IO.popen("file -b --mime -", "w+") do |file| - file.write s[0, 1024] - file.close_write - begin - Encoding.find file.gets[/charset=([-\w]+)/, 1] - rescue ArgumentError - Encoding::BINARY - end - end - #:nocov: - end - - end - - # Create a new Scanner. - # - # * +code+ is the input String and is handled by the superclass - # StringScanner. - # * +options+ is a Hash with Symbols as keys. - # It is merged with the default options of the class (you can - # overwrite default options here.) - # - # Else, a Tokens object is used. - def initialize code = '', options = {} - if self.class == Scanner - raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses." - end - - @options = self.class::DEFAULT_OPTIONS.merge options - - super self.class.normalize(code) - - @tokens = options[:tokens] || Tokens.new - @tokens.scanner = self if @tokens.respond_to? :scanner= - - setup - end - - # Sets back the scanner. Subclasses should redefine the reset_instance - # method instead of this one. - def reset - super - reset_instance - end - - # Set a new string to be scanned. - def string= code - code = self.class.normalize(code) - super code - reset_instance - end - - # the Plugin ID for this scanner - def lang - self.class.lang - end - - # the default file extension for this scanner - def file_extension - self.class.file_extension - end - - # Scan the code and returns all tokens in a Tokens object. - def tokenize source = nil, options = {} - options = @options.merge(options) - - set_tokens_from_options options - set_string_from_source source - - begin - scan_tokens @tokens, options - rescue => e - message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state] - raise_inspect e.message, @tokens, message, 30, e.backtrace - end - - @cached_tokens = @tokens - if source.is_a? Array - @tokens.split_into_parts(*source.map { |part| part.size }) - else - @tokens - end - end - - # Cache the result of tokenize. - def tokens - @cached_tokens ||= tokenize - end - - # Traverse the tokens. - def each &block - tokens.each(&block) - end - include Enumerable - - # The current line position of the scanner, starting with 1. - # See also: #column. - # - # Beware, this is implemented inefficiently. It should be used - # for debugging only. - def line pos = self.pos - return 1 if pos <= 0 - binary_string[0...pos].count("\n") + 1 - end - - # The current column position of the scanner, starting with 1. - # See also: #line. - def column pos = self.pos - return 1 if pos <= 0 - pos - (binary_string.rindex(?\n, pos - 1) || -1) - end - - # The string in binary encoding. - # - # To be used with #pos, which is the index of the byte the scanner - # will scan next. - def binary_string - @binary_string ||= - if string.respond_to?(:bytesize) && string.bytesize != string.size - #:nocov: - string.dup.force_encoding('binary') - #:nocov: - else - string - end - end - - protected - - # Can be implemented by subclasses to do some initialization - # that has to be done once per instance. - # - # Use reset for initialization that has to be done once per - # scan. - def setup # :doc: - end - - def set_string_from_source source - case source - when Array - self.string = self.class.normalize(source.join) - when nil - reset - else - self.string = self.class.normalize(source) - end - end - - def set_tokens_from_options options - @tokens = options[:tokens] || @tokens || Tokens.new - @tokens.scanner = self if @tokens.respond_to? :scanner= - end - - # This is the central method, and commonly the only one a - # subclass implements. - # - # Subclasses must implement this method; it must return +tokens+ - # and must only use Tokens#<< for storing scanned tokens! - def scan_tokens tokens, options # :doc: - raise NotImplementedError, "#{self.class}#scan_tokens not implemented." - end - - # Resets the scanner. - def reset_instance - @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens] - @cached_tokens = nil - @binary_string = nil if defined? @binary_string - end - - SCAN_ERROR_MESSAGE = <<-MESSAGE - - -***ERROR in %s: %s (after %s tokens) - -tokens: -%s - -%s - -surrounding code: -%p ~~ %p - - -***ERROR*** - - MESSAGE - - def raise_inspect_arguments message, tokens, state, ambit - return File.basename(caller[0]), - message, - tokens_size(tokens), - tokens_last(tokens, 10).map(&:inspect).join("\n"), - scanner_state_info(state), - binary_string[pos - ambit, ambit], - binary_string[pos, ambit] - end - - SCANNER_STATE_INFO = <<-INFO -current line: %d column: %d pos: %d -matched: %p state: %p -bol?: %p, eos?: %p - INFO - - def scanner_state_info state - SCANNER_STATE_INFO % [ - line, column, pos, - matched, state || 'No state given!', - bol?, eos?, - ] - end - - # Scanner error with additional status information - def raise_inspect message, tokens, state = self.state, ambit = 30, backtrace = caller - raise ScanError, SCAN_ERROR_MESSAGE % raise_inspect_arguments(message, tokens, state, ambit), backtrace - end - - def tokens_size tokens - tokens.size if tokens.respond_to?(:size) - end - - def tokens_last tokens, n - tokens.respond_to?(:last) ? tokens.last(n) : [] - end - - # Shorthand for scan_until(/\z/). - # This method also avoids a JRuby 1.9 mode bug. - def scan_rest - rest = self.rest - terminate - rest - end - - end - - end -end diff --git a/lib/coderay/scanners.rb b/lib/coderay/scanners.rb new file mode 100644 index 0000000..f824f50 --- /dev/null +++ b/lib/coderay/scanners.rb @@ -0,0 +1,23 @@ +require 'strscan' + +module CodeRay + + autoload :WordList, coderay_path('helpers', 'word_list') + + # = Scanners + # + # This module holds the Scanner class and its subclasses. + # For example, the Ruby scanner is named CodeRay::Scanners::Ruby + # can be found in coderay/scanners/ruby. + # + # Scanner also provides methods and constants for the register + # mechanism and the [] method that returns the Scanner class + # belonging to the given lang. + # + # See PluginHost. + module Scanners + extend PluginHost + plugin_path File.dirname(__FILE__), 'scanners' + end + +end diff --git a/lib/coderay/scanners/java.rb b/lib/coderay/scanners/java.rb index b282864..962154e 100644 --- a/lib/coderay/scanners/java.rb +++ b/lib/coderay/scanners/java.rb @@ -36,7 +36,7 @@ module Scanners add(BuiltinTypes::List, :predefined_type). add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception). add(DIRECTIVES, :directive) # :nodoc: - + ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: STRING_CONTENT_PATTERN = { diff --git a/lib/coderay/scanners/scanner.rb b/lib/coderay/scanners/scanner.rb new file mode 100644 index 0000000..efa710d --- /dev/null +++ b/lib/coderay/scanners/scanner.rb @@ -0,0 +1,337 @@ +# encoding: utf-8 + +module CodeRay + module Scanners + + # = Scanner + # + # The base class for all Scanners. + # + # It is a subclass of Ruby's great +StringScanner+, which + # makes it easy to access the scanning methods inside. + # + # It is also +Enumerable+, so you can use it like an Array of + # Tokens: + # + # require 'coderay' + # + # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;" + # + # for text, kind in c_scanner + # puts text if kind == :operator + # end + # + # # prints: (*==)++; + # + # OK, this is a very simple example :) + # You can also use +map+, +any?+, +find+ and even +sort_by+, + # if you want. + class Scanner < StringScanner + + extend Plugin + plugin_host Scanners + + # Raised if a Scanner fails while scanning + ScanError = Class.new StandardError + + # The default options for all scanner classes. + # + # Define @default_options for subclasses. + DEFAULT_OPTIONS = { } + + KINDS_NOT_LOC = [:comment, :doctype, :docstring] + + attr_accessor :state + + class << self + + # Normalizes the given code into a string with UNIX newlines, in the + # scanner's internal encoding, with invalid and undefined charachters + # replaced by placeholders. Always returns a new object. + def normalize code + # original = code + code = code.to_s unless code.is_a? ::String + return code if code.empty? + + if code.respond_to? :encoding + code = encode_with_encoding code, self.encoding + else + code = to_unix code + end + # code = code.dup if code.eql? original + code + end + + # The typical filename suffix for this scanner's language. + def file_extension extension = lang + @file_extension ||= extension.to_s + end + + # The encoding used internally by this scanner. + def encoding name = 'UTF-8' + @encoding ||= defined?(Encoding.find) && Encoding.find(name) + end + + # The lang of this Scanner class, which is equal to its Plugin ID. + def lang + @plugin_id + end + + protected + + def encode_with_encoding code, target_encoding + if code.encoding == target_encoding + if code.valid_encoding? + return to_unix(code) + else + source_encoding = guess_encoding code + end + else + source_encoding = code.encoding + end + # print "encode_with_encoding from #{source_encoding} to #{target_encoding}" + code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace + end + + def to_unix code + code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code + end + + def guess_encoding s + #:nocov: + IO.popen("file -b --mime -", "w+") do |file| + file.write s[0, 1024] + file.close_write + begin + Encoding.find file.gets[/charset=([-\w]+)/, 1] + rescue ArgumentError + Encoding::BINARY + end + end + #:nocov: + end + + end + + # Create a new Scanner. + # + # * +code+ is the input String and is handled by the superclass + # StringScanner. + # * +options+ is a Hash with Symbols as keys. + # It is merged with the default options of the class (you can + # overwrite default options here.) + # + # Else, a Tokens object is used. + def initialize code = '', options = {} + if self.class == Scanner + raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses." + end + + @options = self.class::DEFAULT_OPTIONS.merge options + + super self.class.normalize(code) + + @tokens = options[:tokens] || Tokens.new + @tokens.scanner = self if @tokens.respond_to? :scanner= + + setup + end + + # Sets back the scanner. Subclasses should redefine the reset_instance + # method instead of this one. + def reset + super + reset_instance + end + + # Set a new string to be scanned. + def string= code + code = self.class.normalize(code) + super code + reset_instance + end + + # the Plugin ID for this scanner + def lang + self.class.lang + end + + # the default file extension for this scanner + def file_extension + self.class.file_extension + end + + # Scan the code and returns all tokens in a Tokens object. + def tokenize source = nil, options = {} + options = @options.merge(options) + + set_tokens_from_options options + set_string_from_source source + + begin + scan_tokens @tokens, options + rescue => e + message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state] + raise_inspect e.message, @tokens, message, 30, e.backtrace + end + + @cached_tokens = @tokens + if source.is_a? Array + @tokens.split_into_parts(*source.map { |part| part.size }) + else + @tokens + end + end + + # Cache the result of tokenize. + def tokens + @cached_tokens ||= tokenize + end + + # Traverse the tokens. + def each &block + tokens.each(&block) + end + include Enumerable + + # The current line position of the scanner, starting with 1. + # See also: #column. + # + # Beware, this is implemented inefficiently. It should be used + # for debugging only. + def line pos = self.pos + return 1 if pos <= 0 + binary_string[0...pos].count("\n") + 1 + end + + # The current column position of the scanner, starting with 1. + # See also: #line. + def column pos = self.pos + return 1 if pos <= 0 + pos - (binary_string.rindex(?\n, pos - 1) || -1) + end + + # The string in binary encoding. + # + # To be used with #pos, which is the index of the byte the scanner + # will scan next. + def binary_string + @binary_string ||= + if string.respond_to?(:bytesize) && string.bytesize != string.size + #:nocov: + string.dup.force_encoding('binary') + #:nocov: + else + string + end + end + + protected + + # Can be implemented by subclasses to do some initialization + # that has to be done once per instance. + # + # Use reset for initialization that has to be done once per + # scan. + def setup # :doc: + end + + def set_string_from_source source + case source + when Array + self.string = self.class.normalize(source.join) + when nil + reset + else + self.string = self.class.normalize(source) + end + end + + def set_tokens_from_options options + @tokens = options[:tokens] || @tokens || Tokens.new + @tokens.scanner = self if @tokens.respond_to? :scanner= + end + + # This is the central method, and commonly the only one a + # subclass implements. + # + # Subclasses must implement this method; it must return +tokens+ + # and must only use Tokens#<< for storing scanned tokens! + def scan_tokens tokens, options # :doc: + raise NotImplementedError, "#{self.class}#scan_tokens not implemented." + end + + # Resets the scanner. + def reset_instance + @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens] + @cached_tokens = nil + @binary_string = nil if defined? @binary_string + end + + SCAN_ERROR_MESSAGE = <<-MESSAGE + + +***ERROR in %s: %s (after %s tokens) + +tokens: +%s + +%s + +surrounding code: +%p ~~ %p + + +***ERROR*** + + MESSAGE + + def raise_inspect_arguments message, tokens, state, ambit + return File.basename(caller[0]), + message, + tokens_size(tokens), + tokens_last(tokens, 10).map(&:inspect).join("\n"), + scanner_state_info(state), + binary_string[pos - ambit, ambit], + binary_string[pos, ambit] + end + + SCANNER_STATE_INFO = <<-INFO +current line: %d column: %d pos: %d +matched: %p state: %p +bol?: %p, eos?: %p + INFO + + def scanner_state_info state + SCANNER_STATE_INFO % [ + line, column, pos, + matched, state || 'No state given!', + bol?, eos?, + ] + end + + # Scanner error with additional status information + def raise_inspect message, tokens, state = self.state, ambit = 30, backtrace = caller + raise ScanError, SCAN_ERROR_MESSAGE % raise_inspect_arguments(message, tokens, state, ambit), backtrace + end + + def tokens_size tokens + tokens.size if tokens.respond_to?(:size) + end + + def tokens_last tokens, n + tokens.respond_to?(:last) ? tokens.last(n) : [] + end + + # Shorthand for scan_until(/\z/). + # This method also avoids a JRuby 1.9 mode bug. + def scan_rest + rest = self.rest + terminate + rest + end + + end + + end +end diff --git a/lib/coderay/style.rb b/lib/coderay/style.rb deleted file mode 100644 index df4704f..0000000 --- a/lib/coderay/style.rb +++ /dev/null @@ -1,23 +0,0 @@ -module CodeRay - - # This module holds the Style class and its subclasses. - # - # See Plugin. - module Styles - extend PluginHost - plugin_path File.dirname(__FILE__), 'styles' - - # Base class for styles. - # - # Styles are used by Encoders::HTML to colorize tokens. - class Style - extend Plugin - plugin_host Styles - - DEFAULT_OPTIONS = { } # :nodoc: - - end - - end - -end diff --git a/lib/coderay/styles.rb b/lib/coderay/styles.rb new file mode 100644 index 0000000..a7c43e4 --- /dev/null +++ b/lib/coderay/styles.rb @@ -0,0 +1,11 @@ +module CodeRay + + # This module holds the Style class and its subclasses. + # + # See Plugin. + module Styles + extend PluginHost + plugin_path File.dirname(__FILE__), 'styles' + end + +end diff --git a/lib/coderay/styles/style.rb b/lib/coderay/styles/style.rb new file mode 100644 index 0000000..a335386 --- /dev/null +++ b/lib/coderay/styles/style.rb @@ -0,0 +1,18 @@ +module CodeRay + + module Styles + + # Base class for styles. + # + # Styles are used by Encoders::HTML to colorize tokens. + class Style + extend Plugin + plugin_host Styles + + DEFAULT_OPTIONS = { } # :nodoc: + + end + + end + +end diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb index e7bffce..aeb3b79 100644 --- a/lib/coderay/tokens.rb +++ b/lib/coderay/tokens.rb @@ -67,7 +67,7 @@ module CodeRay def method_missing meth, options = {} encode meth, options rescue PluginHost::PluginNotFound - super + raise end # Split the tokens into parts of the given +sizes+. -- cgit v1.2.1