diff options
Diffstat (limited to 'lib/coderay')
-rw-r--r-- | lib/coderay/duo.rb | 58 | ||||
-rw-r--r-- | lib/coderay/encoder.rb | 346 | ||||
-rw-r--r-- | lib/coderay/encoders/_map.rb | 16 | ||||
-rw-r--r-- | lib/coderay/encoders/count.rb | 42 | ||||
-rw-r--r-- | lib/coderay/encoders/debug.rb | 92 | ||||
-rw-r--r-- | lib/coderay/encoders/div.rb | 40 | ||||
-rw-r--r-- | lib/coderay/encoders/html/classes.rb | 146 | ||||
-rw-r--r-- | lib/coderay/encoders/html/css.rb | 130 | ||||
-rw-r--r-- | lib/coderay/encoders/html/numerization.rb | 244 | ||||
-rw-r--r-- | lib/coderay/encoders/html/output.rb | 390 | ||||
-rw-r--r-- | lib/coderay/encoders/null.rb | 52 | ||||
-rw-r--r-- | lib/coderay/encoders/page.rb | 42 | ||||
-rw-r--r-- | lib/coderay/encoders/span.rb | 40 | ||||
-rw-r--r-- | lib/coderay/encoders/statistic.rb | 162 | ||||
-rw-r--r-- | lib/coderay/encoders/text.rb | 66 | ||||
-rw-r--r-- | lib/coderay/encoders/tokens.rb | 88 | ||||
-rw-r--r-- | lib/coderay/encoders/xml.rb | 142 | ||||
-rw-r--r-- | lib/coderay/encoders/yaml.rb | 44 | ||||
-rw-r--r-- | lib/coderay/scanners/plaintext.rb | 30 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby/patterns.rb | 432 | ||||
-rw-r--r-- | lib/coderay/scanners/xml.rb | 36 | ||||
-rw-r--r-- | lib/coderay/style.rb | 40 | ||||
-rw-r--r-- | lib/coderay/tokens.rb | 644 |
23 files changed, 1661 insertions, 1661 deletions
diff --git a/lib/coderay/duo.rb b/lib/coderay/duo.rb index 3125568..0e5956e 100644 --- a/lib/coderay/duo.rb +++ b/lib/coderay/duo.rb @@ -1,29 +1,29 @@ -module CodeRay
-
- # = Duo
- #
- # $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $
- #
- # TODO: Doc.
- class Duo
-
- attr_accessor :scanner, :encoder
-
- def initialize lang, format, options = {}
- @scanner = CodeRay.scanner lang, CodeRay.get_scanner_options(options)
- @encoder = CodeRay.encoder format, options
- end
-
- class << self
- alias [] new
- end
-
- def encode code
- @scanner.string = code
- @encoder.encode_tokens(scanner.tokenize)
- end
- alias highlight encode
-
- end
-
-end
+module CodeRay + + # = Duo + # + # $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $ + # + # TODO: Doc. + class Duo + + attr_accessor :scanner, :encoder + + def initialize lang, format, options = {} + @scanner = CodeRay.scanner lang, CodeRay.get_scanner_options(options) + @encoder = CodeRay.encoder format, options + end + + class << self + alias [] new + end + + def encode code + @scanner.string = code + @encoder.encode_tokens(scanner.tokenize) + end + alias highlight encode + + end + +end diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb index 1065a9c..221cd58 100644 --- a/lib/coderay/encoder.rb +++ b/lib/coderay/encoder.rb @@ -1,173 +1,173 @@ -module CodeRay
-
- # This module holds the Encoder class and its subclasses.
- # For example, the HTML encoder is named CodeRay::Encoders::HTML
- # can be found in coderay/encoders/html.
- #
- # Encoders also provides methods and constants for the register
- # mechanism and the [] method that returns the Encoder class
- # belonging to the given format.
- module Encoders
- extend PluginHost
- plugin_path File.dirname(__FILE__), 'encoders'
-
- # = Encoder
- #
- # The Encoder base class. Together with Scanner and
- # Tokens, it forms the highlighting triad.
- #
- # Encoder instances take a Tokens object and do something with it.
- #
- # The most common Encoder is surely the HTML encoder
- # (CodeRay::Encoders::HTML). It highlights the code in a colorful
- # html page.
- # If you want the highlighted code in a div or a span instead,
- # use its subclasses Div and Span.
- class Encoder
- extend Plugin
- plugin_host Encoders
-
- attr_reader :token_stream
-
- class << self
-
- # Returns if the Encoder can be used in streaming mode.
- def streamable?
- is_a? Streamable
- end
-
- # If FILE_EXTENSION isn't defined, this method returns the
- # downcase class name instead.
- def const_missing sym
- if sym == :FILE_EXTENSION
- sym.to_s.downcase
- else
- super
- end
- end
-
- end
-
- # Subclasses are to store their default options in this constant.
- DEFAULT_OPTIONS = { :stream => false }
-
- # The options you gave the Encoder at creating.
- attr_accessor :options
-
- # Creates a new Encoder.
- # +options+ is saved and used for all encode operations, as long
- # as you don't overwrite it there by passing additional options.
- #
- # Encoder objects provide three encode methods:
- # - encode simply takes a +code+ string and a +lang+
- # - encode_tokens expects a +tokens+ object instead
- # - encode_stream is like encode, but uses streaming mode.
- #
- # Each method has an optional +options+ parameter. These are
- # added to the options you passed at creation.
- def initialize options = {}
- @options = self.class::DEFAULT_OPTIONS.merge options
- raise "I am only the basic Encoder class. I can't encode "\
- "anything. :( Use my subclasses." if self.class == Encoder
- end
-
- # Encode a Tokens object.
- def encode_tokens tokens, options = {}
- options = @options.merge options
- setup options
- compile tokens, options
- finish options
- end
-
- # Encode the given +code+ after tokenizing it using the Scanner
- # for +lang+.
- def encode code, lang, options = {}
- options = @options.merge options
- scanner_options = CodeRay.get_scanner_options(options)
- tokens = CodeRay.scan code, lang, scanner_options
- encode_tokens tokens, options
- end
-
- # You can use highlight instead of encode, if that seems
- # more clear to you.
- alias highlight encode
-
- # Encode the given +code+ using the Scanner for +lang+ in
- # streaming mode.
- def encode_stream code, lang, options = {}
- raise NotStreamableError, self unless kind_of? Streamable
- options = @options.merge options
- setup options
- scanner_options = CodeRay.get_scanner_options options
- @token_stream =
- CodeRay.scan_stream code, lang, scanner_options, &self
- finish options
- end
-
- # Behave like a proc. The token method is converted to a proc.
- def to_proc
- method(:token).to_proc
- end
-
- # Return the default file extension for outputs of this encoder.
- def file_extension
- self.class::FILE_EXTENSION
- end
-
- protected
-
- # Called with merged options before encoding starts.
- # Sets @out to an empty string.
- #
- # See the HTML Encoder for an example of option caching.
- def setup options
- @out = ''
- end
-
- # Called with +text+ and +kind+ of the currently scanned token.
- # For simple scanners, it's enougth to implement this method.
- #
- # By default, it calls text_token or block_token, depending on
- # whether +text+ is a String.
- def token text, kind
- if text.is_a? ::String
- text_token text, kind
- elsif text.is_a? ::Symbol
- block_token text, kind
- else
- raise 'Unknown token text type: %p' % text
- end
- end
-
- def text_token text, kind
- end
-
- def block_token action, kind
- case action
- when :open
- open_token kind
- when :close
- close_token kind
- else
- raise 'unknown block action: %p' % action
- end
- end
-
- # Called with merged options after encoding starts.
- # The return value is the result of encoding, typically @out.
- def finish options
- @out
- end
-
- # Do the encoding.
- #
- # The already created +tokens+ object must be used; it can be a
- # TokenStream or a Tokens object.
- def compile tokens, options
- tokens.each(&self)
- end
-
- end
-
- end
-end
+module CodeRay + + # This module holds the Encoder class and its subclasses. + # For example, the HTML encoder is named CodeRay::Encoders::HTML + # can be found in coderay/encoders/html. + # + # Encoders also provides methods and constants for the register + # mechanism and the [] method that returns the Encoder class + # belonging to the given format. + module Encoders + extend PluginHost + plugin_path File.dirname(__FILE__), 'encoders' + + # = Encoder + # + # The Encoder base class. Together with Scanner and + # Tokens, it forms the highlighting triad. + # + # Encoder instances take a Tokens object and do something with it. + # + # The most common Encoder is surely the HTML encoder + # (CodeRay::Encoders::HTML). It highlights the code in a colorful + # html page. + # If you want the highlighted code in a div or a span instead, + # use its subclasses Div and Span. + class Encoder + extend Plugin + plugin_host Encoders + + attr_reader :token_stream + + class << self + + # Returns if the Encoder can be used in streaming mode. + def streamable? + is_a? Streamable + end + + # If FILE_EXTENSION isn't defined, this method returns the + # downcase class name instead. + def const_missing sym + if sym == :FILE_EXTENSION + sym.to_s.downcase + else + super + end + end + + end + + # Subclasses are to store their default options in this constant. + DEFAULT_OPTIONS = { :stream => false } + + # The options you gave the Encoder at creating. + attr_accessor :options + + # Creates a new Encoder. + # +options+ is saved and used for all encode operations, as long + # as you don't overwrite it there by passing additional options. + # + # Encoder objects provide three encode methods: + # - encode simply takes a +code+ string and a +lang+ + # - encode_tokens expects a +tokens+ object instead + # - encode_stream is like encode, but uses streaming mode. + # + # Each method has an optional +options+ parameter. These are + # added to the options you passed at creation. + def initialize options = {} + @options = self.class::DEFAULT_OPTIONS.merge options + raise "I am only the basic Encoder class. I can't encode "\ + "anything. :( Use my subclasses." if self.class == Encoder + end + + # Encode a Tokens object. + def encode_tokens tokens, options = {} + options = @options.merge options + setup options + compile tokens, options + finish options + end + + # Encode the given +code+ after tokenizing it using the Scanner + # for +lang+. + def encode code, lang, options = {} + options = @options.merge options + scanner_options = CodeRay.get_scanner_options(options) + tokens = CodeRay.scan code, lang, scanner_options + encode_tokens tokens, options + end + + # You can use highlight instead of encode, if that seems + # more clear to you. + alias highlight encode + + # Encode the given +code+ using the Scanner for +lang+ in + # streaming mode. + def encode_stream code, lang, options = {} + raise NotStreamableError, self unless kind_of? Streamable + options = @options.merge options + setup options + scanner_options = CodeRay.get_scanner_options options + @token_stream = + CodeRay.scan_stream code, lang, scanner_options, &self + finish options + end + + # Behave like a proc. The token method is converted to a proc. + def to_proc + method(:token).to_proc + end + + # Return the default file extension for outputs of this encoder. + def file_extension + self.class::FILE_EXTENSION + end + + protected + + # Called with merged options before encoding starts. + # Sets @out to an empty string. + # + # See the HTML Encoder for an example of option caching. + def setup options + @out = '' + end + + # Called with +text+ and +kind+ of the currently scanned token. + # For simple scanners, it's enougth to implement this method. + # + # By default, it calls text_token or block_token, depending on + # whether +text+ is a String. + def token text, kind + if text.is_a? ::String + text_token text, kind + elsif text.is_a? ::Symbol + block_token text, kind + else + raise 'Unknown token text type: %p' % text + end + end + + def text_token text, kind + end + + def block_token action, kind + case action + when :open + open_token kind + when :close + close_token kind + else + raise 'unknown block action: %p' % action + end + end + + # Called with merged options after encoding starts. + # The return value is the result of encoding, typically @out. + def finish options + @out + end + + # Do the encoding. + # + # The already created +tokens+ object must be used; it can be a + # TokenStream or a Tokens object. + def compile tokens, options + tokens.each(&self) + end + + end + + end +end diff --git a/lib/coderay/encoders/_map.rb b/lib/coderay/encoders/_map.rb index a22a951..fdd8ae4 100644 --- a/lib/coderay/encoders/_map.rb +++ b/lib/coderay/encoders/_map.rb @@ -1,8 +1,8 @@ -module CodeRay
-module Encoders
-
- map :stats => :statistic,
- :plain => :text
-
-end
-end
+module CodeRay +module Encoders + + map :stats => :statistic, + :plain => :text + +end +end diff --git a/lib/coderay/encoders/count.rb b/lib/coderay/encoders/count.rb index 6885541..c9a6dfd 100644 --- a/lib/coderay/encoders/count.rb +++ b/lib/coderay/encoders/count.rb @@ -1,21 +1,21 @@ -module CodeRay
-module Encoders
-
- class Count < Encoder
-
- include Streamable
- register_for :count
-
- protected
-
- def setup options
- @out = 0
- end
-
- def token text, kind
- @out += 1
- end
- end
-
-end
-end
+module CodeRay +module Encoders + + class Count < Encoder + + include Streamable + register_for :count + + protected + + def setup options + @out = 0 + end + + def token text, kind + @out += 1 + end + end + +end +end diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb index 2639e1f..eb9eaa4 100644 --- a/lib/coderay/encoders/debug.rb +++ b/lib/coderay/encoders/debug.rb @@ -1,46 +1,46 @@ -module CodeRay
-module Encoders
-
- # = Debug Encoder
- #
- # Fast encoder producing simple debug output.
- #
- # It is readable and diff-able and is used for testing.
- #
- # You cannot fully restore the tokens information from the
- # output, because consecutive :space tokens are merged.
- # Use Tokens#dump for caching purposes.
- class Debug < Encoder
-
- include Streamable
- register_for :debug
-
- FILE_EXTENSION = 'raydebug'
-
- protected
- def text_token text, kind
- @out <<
- if kind == :space
- text
- else
- text = text.gsub(/[)\\]/, '\\\\\0')
- "#{kind}(#{text})"
- end
- end
-
- def block_token action, kind
- @out << super
- end
-
- def open_token kind
- "#{kind}<"
- end
-
- def close_token kind
- ">"
- end
-
- end
-
-end
-end
+module CodeRay +module Encoders + + # = Debug Encoder + # + # Fast encoder producing simple debug output. + # + # It is readable and diff-able and is used for testing. + # + # You cannot fully restore the tokens information from the + # output, because consecutive :space tokens are merged. + # Use Tokens#dump for caching purposes. + class Debug < Encoder + + include Streamable + register_for :debug + + FILE_EXTENSION = 'raydebug' + + protected + def text_token text, kind + @out << + if kind == :space + text + else + text = text.gsub(/[)\\]/, '\\\\\0') + "#{kind}(#{text})" + end + end + + def block_token action, kind + @out << super + end + + def open_token kind + "#{kind}<" + end + + def close_token kind + ">" + end + + end + +end +end diff --git a/lib/coderay/encoders/div.rb b/lib/coderay/encoders/div.rb index ce558f2..3d55415 100644 --- a/lib/coderay/encoders/div.rb +++ b/lib/coderay/encoders/div.rb @@ -1,20 +1,20 @@ -module CodeRay
-module Encoders
-
- load :html
-
- class Div < HTML
-
- FILE_EXTENSION = 'div.html'
-
- register_for :div
-
- DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
- :css => :style,
- :wrap => :div,
- })
-
- end
-
-end
-end
+module CodeRay +module Encoders + + load :html + + class Div < HTML + + FILE_EXTENSION = 'div.html' + + register_for :div + + DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ + :css => :style, + :wrap => :div, + }) + + end + +end +end diff --git a/lib/coderay/encoders/html/classes.rb b/lib/coderay/encoders/html/classes.rb index 8493fa0..ea15ca0 100644 --- a/lib/coderay/encoders/html/classes.rb +++ b/lib/coderay/encoders/html/classes.rb @@ -1,73 +1,73 @@ -module CodeRay
-module Encoders
-
- class HTML
-
- ClassOfKind = {
- :attribute_name => 'an',
- :attribute_name_fat => 'af',
- :attribute_value => 'av',
- :attribute_value_fat => 'aw',
- :bin => 'bi',
- :char => 'ch',
- :class => 'cl',
- :class_variable => 'cv',
- :color => 'cr',
- :comment => 'c',
- :constant => 'co',
- :content => 'k',
- :definition => 'df',
- :delimiter => 'dl',
- :directive => 'di',
- :doc => 'do',
- :doc_string => 'ds',
- :entity => 'en',
- :error => 'er',
- :escape => 'e',
- :exception => 'ex',
- :float => 'fl',
- :function => 'fu',
- :global_variable => 'gv',
- :hex => 'hx',
- :include => 'ic',
- :inline => 'il',
- :instance_variable => 'iv',
- :integer => 'i',
- :interpreted => 'in',
- :label => 'la',
- :local_variable => 'lv',
- :modifier => 'mod',
- :oct => 'oc',
- :operator_name => 'on',
- :pre_constant => 'pc',
- :pre_type => 'pt',
- :predefined => 'pd',
- :preprocessor => 'pp',
- :regexp => 'rx',
- :reserved => 'r',
- :shell => 'sh',
- :string => 's',
- :symbol => 'sy',
- :tag => 'ta',
- :tag_fat => 'tf',
- :tag_special => 'ts',
- :type => 'ty',
- :variable => 'v',
- :xml_text => 'xt',
-
- :ident => :NO_HIGHLIGHT, # 'id'
- #:operator => 'op',
- :operator => :NO_HIGHLIGHT, # 'op'
- :space => :NO_HIGHLIGHT, # 'sp'
- :plain => :NO_HIGHLIGHT,
- }
- ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function]
- ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter]
- ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
- ClassOfKind[:escape] = ClassOfKind[:delimiter]
- ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
-
- end
-
-end
-end
+module CodeRay +module Encoders + + class HTML + + ClassOfKind = { + :attribute_name => 'an', + :attribute_name_fat => 'af', + :attribute_value => 'av', + :attribute_value_fat => 'aw', + :bin => 'bi', + :char => 'ch', + :class => 'cl', + :class_variable => 'cv', + :color => 'cr', + :comment => 'c', + :constant => 'co', + :content => 'k', + :definition => 'df', + :delimiter => 'dl', + :directive => 'di', + :doc => 'do', + :doc_string => 'ds', + :entity => 'en', + :error => 'er', + :escape => 'e', + :exception => 'ex', + :float => 'fl', + :function => 'fu', + :global_variable => 'gv', + :hex => 'hx', + :include => 'ic', + :inline => 'il', + :instance_variable => 'iv', + :integer => 'i', + :interpreted => 'in', + :label => 'la', + :local_variable => 'lv', + :modifier => 'mod', + :oct => 'oc', + :operator_name => 'on', + :pre_constant => 'pc', + :pre_type => 'pt', + :predefined => 'pd', + :preprocessor => 'pp', + :regexp => 'rx', + :reserved => 'r', + :shell => 'sh', + :string => 's', + :symbol => 'sy', + :tag => 'ta', + :tag_fat => 'tf', + :tag_special => 'ts', + :type => 'ty', + :variable => 'v', + :xml_text => 'xt', + + :ident => :NO_HIGHLIGHT, # 'id' + #:operator => 'op', + :operator => :NO_HIGHLIGHT, # 'op' + :space => :NO_HIGHLIGHT, # 'sp' + :plain => :NO_HIGHLIGHT, + } + ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function] + ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter] + ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter] + ClassOfKind[:escape] = ClassOfKind[:delimiter] + ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!' + + end + +end +end diff --git a/lib/coderay/encoders/html/css.rb b/lib/coderay/encoders/html/css.rb index b76d682..d577602 100644 --- a/lib/coderay/encoders/html/css.rb +++ b/lib/coderay/encoders/html/css.rb @@ -1,65 +1,65 @@ -module CodeRay
-module Encoders
-
- class HTML
- class CSS
-
- attr :stylesheet
-
- def CSS.load_stylesheet style = nil
- CodeRay::Styles[style]
- end
-
- def initialize style = :default
- @classes = Hash.new
- style = CSS.load_stylesheet style
- @stylesheet = [
- style::CSS_MAIN_STYLES,
- style::TOKEN_COLORS.gsub(/^(?!$)/, '.CodeRay ')
- ].join("\n")
- parse style::TOKEN_COLORS
- end
-
- def [] *styles
- cl = @classes[styles.first]
- return '' unless cl
- style = ''
- 1.upto(styles.size) do |offset|
- break if style = cl[styles[offset .. -1]]
- end
- raise 'Style not found: %p' % [styles] if $DEBUG and style.empty?
- return style
- end
-
- private
-
- CSS_CLASS_PATTERN = /
- ( (?: # $1 = classes
- \s* \. [-\w]+
- )+ )
- \s* \{ \s*
- ( [^\}]+ )? # $2 = style
- \s* \} \s*
- |
- ( . ) # $3 = error
- /mx
- def parse stylesheet
- stylesheet.scan CSS_CLASS_PATTERN do |classes, style, error|
- raise "CSS parse error: '#{error.inspect}' not recognized" if error
- styles = classes.scan(/[-\w]+/)
- cl = styles.pop
- @classes[cl] ||= Hash.new
- @classes[cl][styles] = style.to_s.strip
- end
- end
-
- end
- end
-
-end
-end
-
-if $0 == __FILE__
- require 'pp'
- pp CodeRay::Encoders::HTML::CSS.new
-end
+module CodeRay +module Encoders + + class HTML + class CSS + + attr :stylesheet + + def CSS.load_stylesheet style = nil + CodeRay::Styles[style] + end + + def initialize style = :default + @classes = Hash.new + style = CSS.load_stylesheet style + @stylesheet = [ + style::CSS_MAIN_STYLES, + style::TOKEN_COLORS.gsub(/^(?!$)/, '.CodeRay ') + ].join("\n") + parse style::TOKEN_COLORS + end + + def [] *styles + cl = @classes[styles.first] + return '' unless cl + style = '' + 1.upto(styles.size) do |offset| + break if style = cl[styles[offset .. -1]] + end + raise 'Style not found: %p' % [styles] if $DEBUG and style.empty? + return style + end + + private + + CSS_CLASS_PATTERN = / + ( (?: # $1 = classes + \s* \. [-\w]+ + )+ ) + \s* \{ \s* + ( [^\}]+ )? # $2 = style + \s* \} \s* + | + ( . ) # $3 = error + /mx + def parse stylesheet + stylesheet.scan CSS_CLASS_PATTERN do |classes, style, error| + raise "CSS parse error: '#{error.inspect}' not recognized" if error + styles = classes.scan(/[-\w]+/) + cl = styles.pop + @classes[cl] ||= Hash.new + @classes[cl][styles] = style.to_s.strip + end + end + + end + end + +end +end + +if $0 == __FILE__ + require 'pp' + pp CodeRay::Encoders::HTML::CSS.new +end diff --git a/lib/coderay/encoders/html/numerization.rb b/lib/coderay/encoders/html/numerization.rb index 2960f87..1e4a4ed 100644 --- a/lib/coderay/encoders/html/numerization.rb +++ b/lib/coderay/encoders/html/numerization.rb @@ -1,122 +1,122 @@ -module CodeRay
-module Encoders
-
- class HTML
-
- module Output
-
- def numerize *args
- clone.numerize!(*args)
- end
-
-=begin NUMERIZABLE_WRAPPINGS = {
- :table => [:div, :page, nil],
- :inline => :all,
- :list => [:div, :page, nil]
- }
- NUMERIZABLE_WRAPPINGS.default = :all
-=end
- def numerize! mode = :table, options = {}
- return self unless mode
-
- options = DEFAULT_OPTIONS.merge options
-
- start = options[:line_number_start]
- unless start.is_a? Integer
- raise ArgumentError, "Invalid value %p for :line_number_start; Integer expected." % start
- end
-
- #allowed_wrappings = NUMERIZABLE_WRAPPINGS[mode]
- #unless allowed_wrappings == :all or allowed_wrappings.include? options[:wrap]
- # raise ArgumentError, "Can't numerize, :wrap must be in %p, but is %p" % [NUMERIZABLE_WRAPPINGS, options[:wrap]]
- #end
-
- bold_every = options[:bold_every]
- bolding =
- if bold_every == false
- proc { |line| line.to_s }
- elsif bold_every.is_a? Integer
- raise ArgumentError, ":bolding can't be 0." if bold_every == 0
- proc do |line|
- if line % bold_every == 0
- "<strong>#{line}</strong>" # every bold_every-th number in bold
- else
- line.to_s
- end
- end
- else
- raise ArgumentError, 'Invalid value %p for :bolding; false or Integer expected.' % bold_every
- end
-
- case mode
- when :inline
- max_width = (start + line_count).to_s.size
- line = start
- gsub!(/^/) do
- line_number = bolding.call line
- indent = ' ' * (max_width - line.to_s.size)
- res = "<span class=\"no\">#{indent}#{line_number}</span> "
- line += 1
- res
- end
-
- when :table
- # This is really ugly.
- # Because even monospace fonts seem to have different heights when bold,
- # I make the newline bold, both in the code and the line numbers.
- # FIXME Still not working perfect for Mr. Internet Exploder
- # FIXME Firefox struggles with very long codes (> 200 lines)
- line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n")
- line_numbers << "\n" # also for Mr. MS Internet Exploder :-/
- line_numbers.gsub!(/\n/) { "<tt>\n</tt>" }
-
- line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers)
- gsub!(/\n/) { "<tt>\n</tt>" }
- wrap_in! line_numbers_table_tpl
- @wrapped_in = :div
-
- when :list
- opened_tags = []
- gsub!(/^.*$\n?/) do |line|
- line.chomp!
-
- open = opened_tags.join
- line.scan(%r!<(/)?span[^>]*>?!) do |close,|
- if close
- opened_tags.pop
- else
- opened_tags << $&
- end
- end
- close = '</span>' * opened_tags.size
-
- "<li>#{open}#{line}#{close}</li>"
- end
- wrap_in! LIST
- @wrapped_in = :div
-
- else
- raise ArgumentError, 'Unknown value %p for mode: expected one of %p' %
- [mode, [:table, :list, :inline]]
- end
-
- self
- end
-
- def line_count
- line_count = count("\n")
- position_of_last_newline = rindex(?\n)
- if position_of_last_newline
- after_last_newline = self[position_of_last_newline + 1 .. -1]
- ends_with_newline = after_last_newline[/\A(?:<\/span>)*\z/]
- line_count += 1 if not ends_with_newline
- end
- line_count
- end
-
- end
-
- end
-
-end
-end
+module CodeRay +module Encoders + + class HTML + + module Output + + def numerize *args + clone.numerize!(*args) + end + +=begin NUMERIZABLE_WRAPPINGS = { + :table => [:div, :page, nil], + :inline => :all, + :list => [:div, :page, nil] + } + NUMERIZABLE_WRAPPINGS.default = :all +=end + def numerize! mode = :table, options = {} + return self unless mode + + options = DEFAULT_OPTIONS.merge options + + start = options[:line_number_start] + unless start.is_a? Integer + raise ArgumentError, "Invalid value %p for :line_number_start; Integer expected." % start + end + + #allowed_wrappings = NUMERIZABLE_WRAPPINGS[mode] + #unless allowed_wrappings == :all or allowed_wrappings.include? options[:wrap] + # raise ArgumentError, "Can't numerize, :wrap must be in %p, but is %p" % [NUMERIZABLE_WRAPPINGS, options[:wrap]] + #end + + bold_every = options[:bold_every] + bolding = + if bold_every == false + proc { |line| line.to_s } + elsif bold_every.is_a? Integer + raise ArgumentError, ":bolding can't be 0." if bold_every == 0 + proc do |line| + if line % bold_every == 0 + "<strong>#{line}</strong>" # every bold_every-th number in bold + else + line.to_s + end + end + else + raise ArgumentError, 'Invalid value %p for :bolding; false or Integer expected.' % bold_every + end + + case mode + when :inline + max_width = (start + line_count).to_s.size + line = start + gsub!(/^/) do + line_number = bolding.call line + indent = ' ' * (max_width - line.to_s.size) + res = "<span class=\"no\">#{indent}#{line_number}</span> " + line += 1 + res + end + + when :table + # This is really ugly. + # Because even monospace fonts seem to have different heights when bold, + # I make the newline bold, both in the code and the line numbers. + # FIXME Still not working perfect for Mr. Internet Exploder + # FIXME Firefox struggles with very long codes (> 200 lines) + line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n") + line_numbers << "\n" # also for Mr. MS Internet Exploder :-/ + line_numbers.gsub!(/\n/) { "<tt>\n</tt>" } + + line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers) + gsub!(/\n/) { "<tt>\n</tt>" } + wrap_in! line_numbers_table_tpl + @wrapped_in = :div + + when :list + opened_tags = [] + gsub!(/^.*$\n?/) do |line| + line.chomp! + + open = opened_tags.join + line.scan(%r!<(/)?span[^>]*>?!) do |close,| + if close + opened_tags.pop + else + opened_tags << $& + end + end + close = '</span>' * opened_tags.size + + "<li>#{open}#{line}#{close}</li>" + end + wrap_in! LIST + @wrapped_in = :div + + else + raise ArgumentError, 'Unknown value %p for mode: expected one of %p' % + [mode, [:table, :list, :inline]] + end + + self + end + + def line_count + line_count = count("\n") + position_of_last_newline = rindex(?\n) + if position_of_last_newline + after_last_newline = self[position_of_last_newline + 1 .. -1] + ends_with_newline = after_last_newline[/\A(?:<\/span>)*\z/] + line_count += 1 if not ends_with_newline + end + line_count + end + + end + + end + +end +end diff --git a/lib/coderay/encoders/html/output.rb b/lib/coderay/encoders/html/output.rb index 61258ee..e74e55e 100644 --- a/lib/coderay/encoders/html/output.rb +++ b/lib/coderay/encoders/html/output.rb @@ -1,195 +1,195 @@ -module CodeRay
-module Encoders
-
- class HTML
-
- # This module is included in the output String from thew HTML Encoder.
- #
- # It provides methods like wrap, div, page etc.
- #
- # Remember to use #clone instead of #dup to keep the modules the object was
- # extended with.
- #
- # TODO: more doc.
- module Output
-
- require 'coderay/encoders/html/numerization.rb'
-
- attr_accessor :css
-
- class << self
-
- # This makes Output look like a class.
- #
- # Example:
- #
- # a = Output.new '<span class="co">Code</span>'
- # a.wrap! :page
- def new string, css = CSS.new, element = nil
- output = string.clone.extend self
- output.wrapped_in = element
- output.css = css
- output
- end
-
- # Raises an exception if an object that doesn't respond to to_str is extended by Output,
- # to prevent users from misuse. Use Module#remove_method to disable.
- def extended o
- warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str
- end
-
- def make_stylesheet css, in_tag = false
- sheet = css.stylesheet
- sheet = <<-CSS if in_tag
-<style type="text/css">
-#{sheet}
-</style>
- CSS
- sheet
- end
-
- def page_template_for_css css
- sheet = make_stylesheet css
- PAGE.apply 'CSS', sheet
- end
-
- # Define a new wrapper. This is meta programming.
- def wrapper *wrappers
- wrappers.each do |wrapper|
- define_method wrapper do |*args|
- wrap wrapper, *args
- end
- define_method "#{wrapper}!".to_sym do |*args|
- wrap! wrapper, *args
- end
- end
- end
-
- end
-
- wrapper :div, :span, :page
-
- def wrapped_in? element
- wrapped_in == element
- end
-
- def wrapped_in
- @wrapped_in ||= nil
- end
- attr_writer :wrapped_in
-
- def wrap_in template
- clone.wrap_in! template
- end
-
- def wrap_in! template
- Template.wrap! self, template, 'CONTENT'
- self
- end
-
- def wrap! element, *args
- return self if not element or element == wrapped_in
- case element
- when :div
- raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
- wrap_in! DIV
- when :span
- raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
- wrap_in! SPAN
- when :page
- wrap! :div if wrapped_in? nil
- raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div
- wrap_in! Output.page_template_for_css(@css)
- when nil
- return self
- else
- raise "Unknown value %p for :wrap" % element
- end
- @wrapped_in = element
- self
- end
-
- def wrap *args
- clone.wrap!(*args)
- end
-
- def stylesheet in_tag = false
- Output.make_stylesheet @css, in_tag
- end
-
- class Template < String
-
- def self.wrap! str, template, target
- target = Regexp.new(Regexp.escape("<%#{target}%>"))
- if template =~ target
- str[0,0] = $`
- str << $'
- else
- raise "Template target <%%%p%%> not found" % target
- end
- end
-
- def apply target, replacement
- target = Regexp.new(Regexp.escape("<%#{target}%>"))
- if self =~ target
- Template.new($` + replacement + $')
- else
- raise "Template target <%%%p%%> not found" % target
- end
- end
-
- module Simple
- def ` str #` <-- for stupid editors
- Template.new str
- end
- end
- end
-
- extend Template::Simple
-
-#-- don't include the templates in docu
-
- SPAN = `<span class="CodeRay"><%CONTENT%></span>`
-
- DIV = <<-`DIV`
-<div class="CodeRay">
- <div class="code"><pre><%CONTENT%></pre></div>
-</div>
- DIV
-
- TABLE = <<-`TABLE`
-<table class="CodeRay"><tr>
- <td class="line_numbers" title="click to toggle" onclick="with (this.firstChild.style) { display = (display == '') ? 'none' : '' }"><pre><%LINE_NUMBERS%></pre></td>
- <td class="code"><pre ondblclick="with (this.style) { overflow = (overflow == 'auto' || overflow == '') ? 'visible' : 'auto' }"><%CONTENT%></pre></td>
-</tr></table>
- TABLE
- # title="double click to expand"
-
- LIST = <<-`LIST`
-<ol class="CodeRay"><%CONTENT%></ol>
- LIST
-
- PAGE = <<-`PAGE`
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="de">
-<head>
- <meta http-equiv="content-type" content="text/html; charset=utf-8" />
- <title>CodeRay HTML Encoder Example</title>
- <style type="text/css">
-<%CSS%>
- </style>
-</head>
-<body style="background-color: white;">
-
-<%CONTENT%>
-</body>
-</html>
- PAGE
-
- end
-
- end
-
-end
-end
+module CodeRay +module Encoders + + class HTML + + # This module is included in the output String from thew HTML Encoder. + # + # It provides methods like wrap, div, page etc. + # + # Remember to use #clone instead of #dup to keep the modules the object was + # extended with. + # + # TODO: more doc. + module Output + + require 'coderay/encoders/html/numerization.rb' + + attr_accessor :css + + class << self + + # This makes Output look like a class. + # + # Example: + # + # a = Output.new '<span class="co">Code</span>' + # a.wrap! :page + def new string, css = CSS.new, element = nil + output = string.clone.extend self + output.wrapped_in = element + output.css = css + output + end + + # Raises an exception if an object that doesn't respond to to_str is extended by Output, + # to prevent users from misuse. Use Module#remove_method to disable. + def extended o + warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str + end + + def make_stylesheet css, in_tag = false + sheet = css.stylesheet + sheet = <<-CSS if in_tag +<style type="text/css"> +#{sheet} +</style> + CSS + sheet + end + + def page_template_for_css css + sheet = make_stylesheet css + PAGE.apply 'CSS', sheet + end + + # Define a new wrapper. This is meta programming. + def wrapper *wrappers + wrappers.each do |wrapper| + define_method wrapper do |*args| + wrap wrapper, *args + end + define_method "#{wrapper}!".to_sym do |*args| + wrap! wrapper, *args + end + end + end + + end + + wrapper :div, :span, :page + + def wrapped_in? element + wrapped_in == element + end + + def wrapped_in + @wrapped_in ||= nil + end + attr_writer :wrapped_in + + def wrap_in template + clone.wrap_in! template + end + + def wrap_in! template + Template.wrap! self, template, 'CONTENT' + self + end + + def wrap! element, *args + return self if not element or element == wrapped_in + case element + when :div + raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil + wrap_in! DIV + when :span + raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil + wrap_in! SPAN + when :page + wrap! :div if wrapped_in? nil + raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div + wrap_in! Output.page_template_for_css(@css) + when nil + return self + else + raise "Unknown value %p for :wrap" % element + end + @wrapped_in = element + self + end + + def wrap *args + clone.wrap!(*args) + end + + def stylesheet in_tag = false + Output.make_stylesheet @css, in_tag + end + + class Template < String + + def self.wrap! str, template, target + target = Regexp.new(Regexp.escape("<%#{target}%>")) + if template =~ target + str[0,0] = $` + str << $' + else + raise "Template target <%%%p%%> not found" % target + end + end + + def apply target, replacement + target = Regexp.new(Regexp.escape("<%#{target}%>")) + if self =~ target + Template.new($` + replacement + $') + else + raise "Template target <%%%p%%> not found" % target + end + end + + module Simple + def ` str #` <-- for stupid editors + Template.new str + end + end + end + + extend Template::Simple + +#-- don't include the templates in docu + + SPAN = `<span class="CodeRay"><%CONTENT%></span>` + + DIV = <<-`DIV` +<div class="CodeRay"> + <div class="code"><pre><%CONTENT%></pre></div> +</div> + DIV + + TABLE = <<-`TABLE` +<table class="CodeRay"><tr> + <td class="line_numbers" title="click to toggle" onclick="with (this.firstChild.style) { display = (display == '') ? 'none' : '' }"><pre><%LINE_NUMBERS%></pre></td> + <td class="code"><pre ondblclick="with (this.style) { overflow = (overflow == 'auto' || overflow == '') ? 'visible' : 'auto' }"><%CONTENT%></pre></td> +</tr></table> + TABLE + # title="double click to expand" + + LIST = <<-`LIST` +<ol class="CodeRay"><%CONTENT%></ol> + LIST + + PAGE = <<-`PAGE` +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="de"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8" /> + <title>CodeRay HTML Encoder Example</title> + <style type="text/css"> +<%CSS%> + </style> +</head> +<body style="background-color: white;"> + +<%CONTENT%> +</body> +</html> + PAGE + + end + + end + +end +end diff --git a/lib/coderay/encoders/null.rb b/lib/coderay/encoders/null.rb index 96d81fe..add3862 100644 --- a/lib/coderay/encoders/null.rb +++ b/lib/coderay/encoders/null.rb @@ -1,26 +1,26 @@ -module CodeRay
-module Encoders
-
- # = Null Encoder
- #
- # Does nothing and returns an empty string.
- class Null < Encoder
-
- include Streamable
- register_for :null
-
- # Defined for faster processing
- def to_proc
- proc {}
- end
-
- protected
-
- def token(*)
- # do nothing
- end
-
- end
-
-end
-end
+module CodeRay +module Encoders + + # = Null Encoder + # + # Does nothing and returns an empty string. + class Null < Encoder + + include Streamable + register_for :null + + # Defined for faster processing + def to_proc + proc {} + end + + protected + + def token(*) + # do nothing + end + + end + +end +end diff --git a/lib/coderay/encoders/page.rb b/lib/coderay/encoders/page.rb index 1ed7985..c08f094 100644 --- a/lib/coderay/encoders/page.rb +++ b/lib/coderay/encoders/page.rb @@ -1,21 +1,21 @@ -module CodeRay
-module Encoders
-
- load :html
-
- class Page < HTML
-
- FILE_EXTENSION = 'html'
-
- register_for :page
-
- DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
- :css => :class,
- :wrap => :page,
- :line_numbers => :table
- })
-
- end
-
-end
-end
+module CodeRay +module Encoders + + load :html + + class Page < HTML + + FILE_EXTENSION = 'html' + + register_for :page + + DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ + :css => :class, + :wrap => :page, + :line_numbers => :table + }) + + end + +end +end diff --git a/lib/coderay/encoders/span.rb b/lib/coderay/encoders/span.rb index e892cb2..988afec 100644 --- a/lib/coderay/encoders/span.rb +++ b/lib/coderay/encoders/span.rb @@ -1,20 +1,20 @@ -module CodeRay
-module Encoders
-
- load :html
-
- class Span < HTML
-
- FILE_EXTENSION = 'span.html'
-
- register_for :span
-
- DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
- :css => :style,
- :wrap => :span,
- })
-
- end
-
-end
-end
+module CodeRay +module Encoders + + load :html + + class Span < HTML + + FILE_EXTENSION = 'span.html' + + register_for :span + + DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ + :css => :style, + :wrap => :span, + }) + + end + +end +end diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb index f80d5c8..e2a0460 100644 --- a/lib/coderay/encoders/statistic.rb +++ b/lib/coderay/encoders/statistic.rb @@ -1,81 +1,81 @@ -module CodeRay
-module Encoders
-
- # Makes a statistic for the given tokens.
- class Statistic < Encoder
-
- include Streamable
- register_for :stats, :statistic
-
- attr_reader :type_stats, :real_token_count
-
- protected
-
- TypeStats = Struct.new :count, :size
-
- def setup options
- @type_stats = Hash.new { |h, k| h[k] = TypeStats.new 0, 0 }
- @real_token_count = 0
- end
-
- def generate tokens, options
- @tokens = tokens
- super
- end
-
- def text_token text, kind
- @real_token_count += 1 unless kind == :space
- @type_stats[kind].count += 1
- @type_stats[kind].size += text.size
- @type_stats['TOTAL'].size += text.size
- end
-
- # TODO Hierarchy handling
- def block_token action, kind
- #@content_type = kind
- @type_stats['open/close'].count += 1
- end
-
- def token text, kind
- super
- @type_stats['TOTAL'].count += 1
- end
-
- STATS = <<-STATS
-
-Code Statistics
-
-Tokens %8d
- Non-Whitespace %8d
-Bytes Total %8d
-
-Token Types (%d):
- type count ratio size (average)
--------------------------------------------------------------
-%s
- STATS
-# space 12007 33.81 % 1.7
- TOKEN_TYPES_ROW = <<-TKR
- %-20s %8d %6.2f %% %5.1f
- TKR
-
- def finish options
- all = @type_stats['TOTAL']
- all_count, all_size = all.count, all.size
- @type_stats.each do |type, stat|
- stat.size /= stat.count.to_f
- end
- types_stats = @type_stats.sort_by { |k, v| [-v.count, k.to_s] }.map do |k, v|
- TOKEN_TYPES_ROW % [k, v.count, 100.0 * v.count / all_count, v.size]
- end.join
- STATS % [
- all_count, @real_token_count, all_size,
- @type_stats.delete_if { |k, v| k.is_a? String }.size,
- types_stats
- ]
- end
-
- end
-
-end
-end
+module CodeRay +module Encoders + + # Makes a statistic for the given tokens. + class Statistic < Encoder + + include Streamable + register_for :stats, :statistic + + attr_reader :type_stats, :real_token_count + + protected + + TypeStats = Struct.new :count, :size + + def setup options + @type_stats = Hash.new { |h, k| h[k] = TypeStats.new 0, 0 } + @real_token_count = 0 + end + + def generate tokens, options + @tokens = tokens + super + end + + def text_token text, kind + @real_token_count += 1 unless kind == :space + @type_stats[kind].count += 1 + @type_stats[kind].size += text.size + @type_stats['TOTAL'].size += text.size + end + + # TODO Hierarchy handling + def block_token action, kind + #@content_type = kind + @type_stats['open/close'].count += 1 + end + + def token text, kind + super + @type_stats['TOTAL'].count += 1 + end + + STATS = <<-STATS + +Code Statistics + +Tokens %8d + Non-Whitespace %8d +Bytes Total %8d + +Token Types (%d): + type count ratio size (average) +------------------------------------------------------------- +%s + STATS +# space 12007 33.81 % 1.7 + TOKEN_TYPES_ROW = <<-TKR + %-20s %8d %6.2f %% %5.1f + TKR + + def finish options + all = @type_stats['TOTAL'] + all_count, all_size = all.count, all.size + @type_stats.each do |type, stat| + stat.size /= stat.count.to_f + end + types_stats = @type_stats.sort_by { |k, v| [-v.count, k.to_s] }.map do |k, v| + TOKEN_TYPES_ROW % [k, v.count, 100.0 * v.count / all_count, v.size] + end.join + STATS % [ + all_count, @real_token_count, all_size, + @type_stats.delete_if { |k, v| k.is_a? String }.size, + types_stats + ] + end + + end + +end +end diff --git a/lib/coderay/encoders/text.rb b/lib/coderay/encoders/text.rb index 31661ef..17256c6 100644 --- a/lib/coderay/encoders/text.rb +++ b/lib/coderay/encoders/text.rb @@ -1,33 +1,33 @@ -module CodeRay
-module Encoders
-
- class Text < Encoder
-
- include Streamable
- register_for :text
-
- FILE_EXTENSION = 'txt'
-
- DEFAULT_OPTIONS = {
- :separator => ''
- }
-
- protected
- def setup options
- super
- @sep = options[:separator]
- end
-
- def token text, kind
- return unless text.respond_to? :to_str
- @out << text + @sep
- end
-
- def finish options
- @out.chomp @sep
- end
-
- end
-
-end
-end
+module CodeRay +module Encoders + + class Text < Encoder + + include Streamable + register_for :text + + FILE_EXTENSION = 'txt' + + DEFAULT_OPTIONS = { + :separator => '' + } + + protected + def setup options + super + @sep = options[:separator] + end + + def token text, kind + return unless text.respond_to? :to_str + @out << text + @sep + end + + def finish options + @out.chomp @sep + end + + end + +end +end diff --git a/lib/coderay/encoders/tokens.rb b/lib/coderay/encoders/tokens.rb index 743cc0e..2428589 100644 --- a/lib/coderay/encoders/tokens.rb +++ b/lib/coderay/encoders/tokens.rb @@ -1,44 +1,44 @@ -module CodeRay
-module Encoders
-
- # The Tokens encoder converts the tokens to a simple
- # readable format. It doesn't use colors and is mainly
- # intended for console output.
- #
- # The tokens are converted with Tokens.write_token.
- #
- # The format is:
- #
- # <token-kind> \t <escaped token-text> \n
- #
- # Example:
- #
- # require 'coderay'
- # puts CodeRay.scan("puts 3 + 4", :ruby).tokens
- #
- # prints:
- #
- # ident puts
- # space
- # integer 3
- # space
- # operator +
- # space
- # integer 4
- #
- class Tokens < Encoder
-
- include Streamable
- register_for :tokens
-
- FILE_EXTENSION = 'tok'
-
- protected
- def token *args
- @out << CodeRay::Tokens.write_token(*args)
- end
-
- end
-
-end
-end
+module CodeRay +module Encoders + + # The Tokens encoder converts the tokens to a simple + # readable format. It doesn't use colors and is mainly + # intended for console output. + # + # The tokens are converted with Tokens.write_token. + # + # The format is: + # + # <token-kind> \t <escaped token-text> \n + # + # Example: + # + # require 'coderay' + # puts CodeRay.scan("puts 3 + 4", :ruby).tokens + # + # prints: + # + # ident puts + # space + # integer 3 + # space + # operator + + # space + # integer 4 + # + class Tokens < Encoder + + include Streamable + register_for :tokens + + FILE_EXTENSION = 'tok' + + protected + def token *args + @out << CodeRay::Tokens.write_token(*args) + end + + end + +end +end diff --git a/lib/coderay/encoders/xml.rb b/lib/coderay/encoders/xml.rb index 21ef0cf..09e4549 100644 --- a/lib/coderay/encoders/xml.rb +++ b/lib/coderay/encoders/xml.rb @@ -1,71 +1,71 @@ -module CodeRay
-module Encoders
-
- # = XML Encoder
- #
- # Uses REXML. Very slow.
- class XML < Encoder
-
- include Streamable
- register_for :xml
-
- FILE_EXTENSION = 'xml'
-
- require 'rexml/document'
-
- DEFAULT_OPTIONS = {
- :tab_width => 8,
- :pretty => -1,
- :transitive => false,
- }
-
- protected
-
- def setup options
- @out = ''
- @doc = REXML::Document.new
- @doc << REXML::XMLDecl.new
- @tab_width = options[:tab_width]
- @root = @node = @doc.add_element('coderay-tokens')
- end
-
- def finish options
- @doc.write @out, options[:pretty], options[:transitive], true
- @out
- end
-
- def text_token text, kind
- if kind == :space
- token = @node
- else
- token = @node.add_element kind.to_s
- end
- text.scan(/(\x20+)|(\t+)|(\n)|[^\x20\t\n]+/) do |space, tab, nl|
- case
- when space
- token << REXML::Text.new(space, true)
- when tab
- token << REXML::Text.new(tab, true)
- when nl
- token << REXML::Text.new(nl, true)
- else
- token << REXML::Text.new($&)
- end
- end
- end
-
- def open_token kind
- @node = @node.add_element kind.to_s
- end
-
- def close_token kind
- if @node == @root
- raise 'no token to close!'
- end
- @node = @node.parent
- end
-
- end
-
-end
-end
+module CodeRay +module Encoders + + # = XML Encoder + # + # Uses REXML. Very slow. + class XML < Encoder + + include Streamable + register_for :xml + + FILE_EXTENSION = 'xml' + + require 'rexml/document' + + DEFAULT_OPTIONS = { + :tab_width => 8, + :pretty => -1, + :transitive => false, + } + + protected + + def setup options + @out = '' + @doc = REXML::Document.new + @doc << REXML::XMLDecl.new + @tab_width = options[:tab_width] + @root = @node = @doc.add_element('coderay-tokens') + end + + def finish options + @doc.write @out, options[:pretty], options[:transitive], true + @out + end + + def text_token text, kind + if kind == :space + token = @node + else + token = @node.add_element kind.to_s + end + text.scan(/(\x20+)|(\t+)|(\n)|[^\x20\t\n]+/) do |space, tab, nl| + case + when space + token << REXML::Text.new(space, true) + when tab + token << REXML::Text.new(tab, true) + when nl + token << REXML::Text.new(nl, true) + else + token << REXML::Text.new($&) + end + end + end + + def open_token kind + @node = @node.add_element kind.to_s + end + + def close_token kind + if @node == @root + raise 'no token to close!' + end + @node = @node.parent + end + + end + +end +end diff --git a/lib/coderay/encoders/yaml.rb b/lib/coderay/encoders/yaml.rb index 47f64a4..5564e58 100644 --- a/lib/coderay/encoders/yaml.rb +++ b/lib/coderay/encoders/yaml.rb @@ -1,22 +1,22 @@ -module CodeRay
-module Encoders
-
- # = YAML Encoder
- #
- # Slow.
- class YAML < Encoder
-
- register_for :yaml
-
- FILE_EXTENSION = 'yaml'
-
- protected
- def compile tokens, options
- require 'yaml'
- @out = tokens.to_a.to_yaml
- end
-
- end
-
-end
-end
+module CodeRay +module Encoders + + # = YAML Encoder + # + # Slow. + class YAML < Encoder + + register_for :yaml + + FILE_EXTENSION = 'yaml' + + protected + def compile tokens, options + require 'yaml' + @out = tokens.to_a.to_yaml + end + + end + +end +end diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb index 9007646..432745f 100644 --- a/lib/coderay/scanners/plaintext.rb +++ b/lib/coderay/scanners/plaintext.rb @@ -1,15 +1,15 @@ -module CodeRay
-module Scanners
-
- class Plaintext < Scanner
-
- register_for :plaintext, :plain
-
- def scan_tokens tokens, options
- tokens << [scan_until(/\z/), :plain]
- end
-
- end
-
-end
-end
+module CodeRay +module Scanners + + class Plaintext < Scanner + + register_for :plaintext, :plain + + def scan_tokens tokens, options + tokens << [scan_until(/\z/), :plain] + end + + end + +end +end diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index b1e0d1b..c601011 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -1,216 +1,216 @@ -module CodeRay
-module Scanners
-
- module Ruby::Patterns # :nodoc:
-
- RESERVED_WORDS = %w[
- and def end in or unless begin
- defined? ensure module redo super until
- BEGIN break do next rescue then
- when END case else for retry
- while alias class elsif if not return
- undef yield
- ]
-
- DEF_KEYWORDS = %w[ def ]
- UNDEF_KEYWORDS = %w[ undef ]
- MODULE_KEYWORDS = %w[class module]
- DEF_NEW_STATE = WordList.new(:initial).
- add(DEF_KEYWORDS, :def_expected).
- add(UNDEF_KEYWORDS, :undef_expected).
- add(MODULE_KEYWORDS, :module_expected)
-
- IDENTS_ALLOWING_REGEXP = %w[
- and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split
- ]
- REGEXP_ALLOWED = WordList.new(false).
- add(IDENTS_ALLOWING_REGEXP, :set)
-
- PREDEFINED_CONSTANTS = %w[
- nil true false self
- DATA ARGV ARGF __FILE__ __LINE__
- ]
-
- IDENT_KIND = WordList.new(:ident).
- add(RESERVED_WORDS, :reserved).
- add(PREDEFINED_CONSTANTS, :pre_constant)
-
- IDENT = /[a-z_][\w_]*/i
-
- METHOD_NAME = / #{IDENT} [?!]? /ox
- METHOD_NAME_OPERATOR = /
- \*\*? # multiplication and power
- | [-+]@? # plus, minus
- | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
- | \[\]=? # array getter and setter
- | << | >> # append or shift left, shift right
- | <=?>? | >=? # comparison, rocket operator
- | ===? # simple equality and case equality
- /ox
- METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
- INSTANCE_VARIABLE = / @ #{IDENT} /ox
- CLASS_VARIABLE = / @@ #{IDENT} /ox
- OBJECT_VARIABLE = / @@? #{IDENT} /ox
- GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
- PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
- VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
-
- QUOTE_TO_TYPE = {
- '`' => :shell,
- '/'=> :regexp,
- }
- QUOTE_TO_TYPE.default = :string
-
- REGEXP_MODIFIERS = /[mixounse]*/
- REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/
-
- DECIMAL = /\d+(?:_\d+)*/
- OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
- HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
- BINARY = /0b[01]+(?:_[01]+)*/
-
- EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
- FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
- FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
- NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
-
- SYMBOL = /
- :
- (?:
- #{METHOD_NAME_EX}
- | #{PREFIX_VARIABLE}
- | ['"]
- )
- /ox
-
- # TODO investigste \M, \c and \C escape sequences
- # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
- # assert_equal(225, ?\M-a)
- # assert_equal(129, ?\M-\C-a)
- ESCAPE = /
- [abefnrstv]
- | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
- | [0-7]{1,3}
- | x[0-9A-Fa-f]{1,2}
- | .
- /mx
-
- CHARACTER = /
- \?
- (?:
- [^\s\\]
- | \\ #{ESCAPE}
- )
- /mx
-
- # NOTE: This is not completely correct, but
- # nobody needs heredoc delimiters ending with \n.
- HEREDOC_OPEN = /
- << (-)? # $1 = float
- (?:
- ( [A-Za-z_0-9]+ ) # $2 = delim
- |
- ( ["'`] ) # $3 = quote, type
- ( [^\n]*? ) \3 # $4 = delim
- )
- /mx
-
- RUBYDOC = /
- =begin (?!\S)
- .*?
- (?: \Z | ^=end (?!\S) [^\n]* )
- /mx
-
- DATA = /
- __END__$
- .*?
- (?: \Z | (?=^\#CODE) )
- /mx
-
- RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
-
- RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
-
- # FIXME: \s and = are only a workaround, they are still allowed
- # as delimiters.
- FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx
- FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
-
- FancyStringType = {
- 'q' => [:string, false],
- 'Q' => [:string, true],
- 'r' => [:regexp, true],
- 's' => [:symbol, false],
- 'x' => [:shell, true]
- }
- FancyStringType['w'] = FancyStringType['q']
- FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
-
- class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
- :paren, :paren_depth, :pattern, :next_state
-
- CLOSING_PAREN = Hash[ *%w[
- ( )
- [ ]
- < >
- { }
- ] ]
-
- CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
- OPENING_PAREN = CLOSING_PAREN.invert
-
- STRING_PATTERN = Hash.new { |h, k|
- delim, interpreted = *k
- delim_pattern = Regexp.escape(delim.dup)
- if closing_paren = CLOSING_PAREN[delim]
- delim_pattern << Regexp.escape(closing_paren)
- end
-
-
- special_escapes =
- case interpreted
- when :regexp_symbols
- '| ' + REGEXP_SYMBOLS.source
- when :words
- '| \s'
- end
-
- h[k] =
- if interpreted and not delim == '#'
- / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
- else
- / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
- end
- }
-
- HEREDOC_PATTERN = Hash.new { |h, k|
- delim, interpreted, indented = *k
- delim_pattern = Regexp.escape(delim.dup)
- delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
- h[k] =
- if interpreted
- / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
- else
- / (?= #{delim_pattern}() | \\ ) /mx
- end
- }
-
- def initialize kind, interpreted, delim, heredoc = false
- if heredoc
- pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
- delim = nil
- else
- pattern = STRING_PATTERN[ [delim, interpreted] ]
- if paren = CLOSING_PAREN[delim]
- delim, paren = paren, delim
- paren_depth = 1
- end
- end
- super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
- end
- end unless defined? StringState
-
- end
-
-end
-end
+module CodeRay +module Scanners + + module Ruby::Patterns # :nodoc: + + RESERVED_WORDS = %w[ + and def end in or unless begin + defined? ensure module redo super until + BEGIN break do next rescue then + when END case else for retry + while alias class elsif if not return + undef yield + ] + + DEF_KEYWORDS = %w[ def ] + UNDEF_KEYWORDS = %w[ undef ] + MODULE_KEYWORDS = %w[class module] + DEF_NEW_STATE = WordList.new(:initial). + add(DEF_KEYWORDS, :def_expected). + add(UNDEF_KEYWORDS, :undef_expected). + add(MODULE_KEYWORDS, :module_expected) + + IDENTS_ALLOWING_REGEXP = %w[ + and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split + ] + REGEXP_ALLOWED = WordList.new(false). + add(IDENTS_ALLOWING_REGEXP, :set) + + PREDEFINED_CONSTANTS = %w[ + nil true false self + DATA ARGV ARGF __FILE__ __LINE__ + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_CONSTANTS, :pre_constant) + + IDENT = /[a-z_][\w_]*/i + + METHOD_NAME = / #{IDENT} [?!]? /ox + METHOD_NAME_OPERATOR = / + \*\*? # multiplication and power + | [-+]@? # plus, minus + | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde + | \[\]=? # array getter and setter + | << | >> # append or shift left, shift right + | <=?>? | >=? # comparison, rocket operator + | ===? # simple equality and case equality + /ox + METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox + INSTANCE_VARIABLE = / @ #{IDENT} /ox + CLASS_VARIABLE = / @@ #{IDENT} /ox + OBJECT_VARIABLE = / @@? #{IDENT} /ox + GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox + PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox + VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox + + QUOTE_TO_TYPE = { + '`' => :shell, + '/'=> :regexp, + } + QUOTE_TO_TYPE.default = :string + + REGEXP_MODIFIERS = /[mixounse]*/ + REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ + + DECIMAL = /\d+(?:_\d+)*/ + OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ + HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ + BINARY = /0b[01]+(?:_[01]+)*/ + + EXPONENT = / [eE] [+-]? #{DECIMAL} /ox + FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox + FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox + NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox + + SYMBOL = / + : + (?: + #{METHOD_NAME_EX} + | #{PREFIX_VARIABLE} + | ['"] + ) + /ox + + # TODO investigste \M, \c and \C escape sequences + # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) ) + # assert_equal(225, ?\M-a) + # assert_equal(129, ?\M-\C-a) + ESCAPE = / + [abefnrstv] + | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M- + | [0-7]{1,3} + | x[0-9A-Fa-f]{1,2} + | . + /mx + + CHARACTER = / + \? + (?: + [^\s\\] + | \\ #{ESCAPE} + ) + /mx + + # NOTE: This is not completely correct, but + # nobody needs heredoc delimiters ending with \n. + HEREDOC_OPEN = / + << (-)? # $1 = float + (?: + ( [A-Za-z_0-9]+ ) # $2 = delim + | + ( ["'`] ) # $3 = quote, type + ( [^\n]*? ) \3 # $4 = delim + ) + /mx + + RUBYDOC = / + =begin (?!\S) + .*? + (?: \Z | ^=end (?!\S) [^\n]* ) + /mx + + DATA = / + __END__$ + .*? + (?: \Z | (?=^\#CODE) ) + /mx + + RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo + + RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x + + # FIXME: \s and = are only a workaround, they are still allowed + # as delimiters. + FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx + FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx + + FancyStringType = { + 'q' => [:string, false], + 'Q' => [:string, true], + 'r' => [:regexp, true], + 's' => [:symbol, false], + 'x' => [:shell, true] + } + FancyStringType['w'] = FancyStringType['q'] + FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q'] + + class StringState < Struct.new :type, :interpreted, :delim, :heredoc, + :paren, :paren_depth, :pattern, :next_state + + CLOSING_PAREN = Hash[ *%w[ + ( ) + [ ] + < > + { } + ] ] + + CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with << + OPENING_PAREN = CLOSING_PAREN.invert + + STRING_PATTERN = Hash.new { |h, k| + delim, interpreted = *k + delim_pattern = Regexp.escape(delim.dup) + if closing_paren = CLOSING_PAREN[delim] + delim_pattern << Regexp.escape(closing_paren) + end + + + special_escapes = + case interpreted + when :regexp_symbols + '| ' + REGEXP_SYMBOLS.source + when :words + '| \s' + end + + h[k] = + if interpreted and not delim == '#' + / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx + else + / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx + end + } + + HEREDOC_PATTERN = Hash.new { |h, k| + delim, interpreted, indented = *k + delim_pattern = Regexp.escape(delim.dup) + delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x + h[k] = + if interpreted + / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc + else + / (?= #{delim_pattern}() | \\ ) /mx + end + } + + def initialize kind, interpreted, delim, heredoc = false + if heredoc + pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ] + delim = nil + else + pattern = STRING_PATTERN[ [delim, interpreted] ] + if paren = CLOSING_PAREN[delim] + delim, paren = paren, delim + paren_depth = 1 + end + end + super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial + end + end unless defined? StringState + + end + +end +end diff --git a/lib/coderay/scanners/xml.rb b/lib/coderay/scanners/xml.rb index 5ce8ce9..ff923fb 100644 --- a/lib/coderay/scanners/xml.rb +++ b/lib/coderay/scanners/xml.rb @@ -1,18 +1,18 @@ -module CodeRay
-module Scanners
-
- load :html
-
- # XML Scanner
- #
- # $Id$
- #
- # Currently this is the same scanner as Scanners::HTML.
- class XML < HTML
-
- register_for :xml
-
- end
-
-end
-end
+module CodeRay +module Scanners + + load :html + + # XML Scanner + # + # $Id$ + # + # Currently this is the same scanner as Scanners::HTML. + class XML < HTML + + register_for :xml + + end + +end +end diff --git a/lib/coderay/style.rb b/lib/coderay/style.rb index 057f8d4..c2977c5 100644 --- a/lib/coderay/style.rb +++ b/lib/coderay/style.rb @@ -1,20 +1,20 @@ -module CodeRay
-
- # This module holds the Style class and its subclasses.
- #
- # See Plugin.
- module Styles
- extend PluginHost
- plugin_path File.dirname(__FILE__), 'styles'
-
- class Style
- extend Plugin
- plugin_host Styles
-
- DEFAULT_OPTIONS = { }
-
- end
-
- end
-
-end
+module CodeRay + + # This module holds the Style class and its subclasses. + # + # See Plugin. + module Styles + extend PluginHost + plugin_path File.dirname(__FILE__), 'styles' + + class Style + extend Plugin + plugin_host Styles + + DEFAULT_OPTIONS = { } + + end + + end + +end diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb index 8b8c692..c8c62e0 100644 --- a/lib/coderay/tokens.rb +++ b/lib/coderay/tokens.rb @@ -1,322 +1,322 @@ -module CodeRay
-
- # = Tokens
- #
- # The Tokens class represents a list of tokens returnd from
- # a Scanner.
- #
- # A token is not a special object, just a two-element Array
- # consisting of
- # * the _token_ _kind_ (a Symbol representing the type of the token)
- # * the _token_ _text_ (the original source of the token in a String)
- #
- # A token looks like this:
- #
- # [:comment, '# It looks like this']
- # [:float, '3.1415926']
- # [:error, 'äöü']
- #
- # Some scanners also yield some kind of sub-tokens, represented by special
- # token texts, namely :open and :close .
- #
- # The Ruby scanner, for example, splits "a string" into:
- #
- # [
- # [:open, :string],
- # [:delimiter, '"'],
- # [:content, 'a string'],
- # [:delimiter, '"'],
- # [:close, :string]
- # ]
- #
- # Tokens is also the interface between Scanners and Encoders:
- # The input is split and saved into a Tokens object. The Encoder
- # then builds the output from this object.
- #
- # Thus, the syntax below becomes clear:
- #
- # CodeRay.scan('price = 2.59', :ruby).html
- # # the Tokens object is here -------^
- #
- # See how small it is? ;)
- #
- # Tokens gives you the power to handle pre-scanned code very easily:
- # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
- # that you put in your DB.
- #
- # Tokens' subclass TokenStream allows streaming to save memory.
- class Tokens < Array
-
- class << self
-
- # Convert the token to a string.
- #
- # This format is used by Encoders.Tokens.
- # It can be reverted using read_token.
- def write_token text, type
- if text.is_a? String
- "#{type}\t#{escape(text)}\n"
- else
- ":#{text}\t#{type}\t\n"
- end
- end
-
- # Read a token from the string.
- #
- # Inversion of write_token.
- #
- # TODO Test this!
- def read_token token
- type, text = token.split("\t", 2)
- if type[0] == ?:
- [text.to_sym, type[1..-1].to_sym]
- else
- [type.to_sym, unescape(text)]
- end
- end
-
- # Escapes a string for use in write_token.
- def escape text
- text.gsub(/[\n\\]/, '\\\\\&')
- end
-
- # Unescapes a string created by escape.
- def unescape text
- text.gsub(/\\[\n\\]/) { |m| m[1,1] }
- end
-
- end
-
- # Whether the object is a TokenStream.
- #
- # Returns false.
- def stream?
- false
- end
-
- # Iterates over all tokens.
- #
- # If a filter is given, only tokens of that kind are yielded.
- def each kind_filter = nil, &block
- unless kind_filter
- super(&block)
- else
- super() do |text, kind|
- next unless kind == kind_filter
- yield text, kind
- end
- end
- end
-
- # Iterates over all text tokens.
- # Range tokens like [:open, :string] are left out.
- #
- # Example:
- # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
- def each_text_token
- each do |text, kind|
- next unless text.respond_to? :to_str
- yield text, kind
- end
- end
-
- # Encode the tokens using encoder.
- #
- # encoder can be
- # * a symbol like :html oder :statistic
- # * an Encoder class
- # * an Encoder object
- #
- # options are passed to the encoder.
- def encode encoder, options = {}
- unless encoder.is_a? Encoders::Encoder
- unless encoder.is_a? Class
- encoder_class = Encoders[encoder]
- end
- encoder = encoder_class.new options
- end
- encoder.encode_tokens self, options
- end
-
-
- # Turn into a string using Encoders::Text.
- #
- # +options+ are passed to the encoder if given.
- def to_s options = {}
- encode :text, options
- end
-
-
- # Redirects unknown methods to encoder calls.
- #
- # For example, if you call +tokens.html+, the HTML encoder
- # is used to highlight the tokens.
- def method_missing meth, options = {}
- Encoders[meth].new(options).encode_tokens self
- end
-
- # Returns the tokens compressed by joining consecutive
- # tokens of the same kind.
- #
- # This can not be undone, but should yield the same output
- # in most Encoders. It basically makes the output smaller.
- #
- # Combined with dump, it saves space for the cost of time.
- #
- # If the scanner is written carefully, this is not required -
- # for example, consecutive //-comment lines could already be
- # joined in one comment token by the Scanner.
- def optimize
- print ' Tokens#optimize: before: %d - ' % size if $DEBUG
- last_kind = last_text = nil
- new = self.class.new
- each do |text, kind|
- if text.is_a? String
- if kind == last_kind
- last_text << text
- else
- new << [last_text, last_kind] if last_kind
- last_text = text
- last_kind = kind
- end
- else
- new << [last_text, last_kind] if last_kind
- last_kind = last_text = nil
- new << [text, kind]
- end
- end
- new << [last_text, last_kind] if last_kind
- print 'after: %d (%d saved = %2.0f%%)' %
- [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
- new
- end
-
- # Compact the object itself; see optimize.
- def optimize!
- replace optimize
- end
-
- # Dumps the object into a String that can be saved
- # in files or databases.
- #
- # The dump is created with Marshal.dump;
- # In addition, it is gzipped using GZip.gzip.
- #
- # The returned String object includes Undumping
- # so it has an #undump method. See Tokens.load.
- #
- # You can configure the level of compression,
- # but the default value 7 should be what you want
- # in most cases as it is a good comprimise between
- # speed and compression rate.
- #
- # See GZip module.
- def dump gzip_level = 7
- require 'coderay/helpers/gzip_simple'
- dump = Marshal.dump self
- dump = dump.gzip gzip_level
- dump.extend Undumping
- end
-
- # The total size of the tokens.
- # Should be equal to the input size before
- # scanning.
- def text_size
- map { |t, k| t }.join.size
- end
-
- # Include this module to give an object an #undump
- # method.
- #
- # The string returned by Tokens.dump includes Undumping.
- module Undumping
- # Calls Tokens.load with itself.
- def undump
- Tokens.load self
- end
- end
-
- # Undump the object using Marshal.load, then
- # unzip it using GZip.gunzip.
- #
- # The result is commonly a Tokens object, but
- # this is not guaranteed.
- def Tokens.load dump
- require 'coderay/helpers/gzip_simple'
- dump = dump.gunzip
- @dump = Marshal.load dump
- end
-
- end
-
-
- # = TokenStream
- #
- # The TokenStream class is a fake Array without elements.
- #
- # It redirects the method << to a block given at creation.
- #
- # This allows scanners and Encoders to use streaming (no
- # tokens are saved, the input is highlighted the same time it
- # is scanned) with the same code.
- #
- # See CodeRay.encode_stream and CodeRay.scan_stream
- class TokenStream < Tokens
-
- # Whether the object is a TokenStream.
- #
- # Returns true.
- def stream?
- true
- end
-
- # The Array is empty, but size counts the tokens given by <<.
- attr_reader :size
-
- # Creates a new TokenStream that calls +block+ whenever
- # its << method is called.
- #
- # Example:
- #
- # require 'coderay'
- #
- # token_stream = CodeRay::TokenStream.new do |kind, text|
- # puts 'kind: %s, text size: %d.' % [kind, text.size]
- # end
- #
- # token_stream << [:regexp, '/\d+/']
- # #-> kind: rexpexp, text size: 5.
- #
- def initialize &block
- raise ArgumentError, 'Block expected for streaming.' unless block
- @callback = block
- @size = 0
- end
-
- # Calls +block+ with +token+ and increments size.
- #
- # Returns self.
- def << token
- @callback.call token
- @size += 1
- self
- end
-
- # This method is not implemented due to speed reasons. Use Tokens.
- def text_size
- raise NotImplementedError, 'This method is not implemented due to speed reasons.'
- end
-
- # A TokenStream cannot be dumped. Use Tokens.
- def dump
- raise NotImplementedError, 'A TokenStream cannot be dumped.'
- end
-
- # A TokenStream cannot be optimized. Use Tokens.
- def optimize
- raise NotImplementedError, 'A TokenStream cannot be optimized.'
- end
-
- end
-
-end
+module CodeRay + + # = Tokens + # + # The Tokens class represents a list of tokens returnd from + # a Scanner. + # + # A token is not a special object, just a two-element Array + # consisting of + # * the _token_ _kind_ (a Symbol representing the type of the token) + # * the _token_ _text_ (the original source of the token in a String) + # + # A token looks like this: + # + # [:comment, '# It looks like this'] + # [:float, '3.1415926'] + # [:error, 'äöü'] + # + # Some scanners also yield some kind of sub-tokens, represented by special + # token texts, namely :open and :close . + # + # The Ruby scanner, for example, splits "a string" into: + # + # [ + # [:open, :string], + # [:delimiter, '"'], + # [:content, 'a string'], + # [:delimiter, '"'], + # [:close, :string] + # ] + # + # Tokens is also the interface between Scanners and Encoders: + # The input is split and saved into a Tokens object. The Encoder + # then builds the output from this object. + # + # Thus, the syntax below becomes clear: + # + # CodeRay.scan('price = 2.59', :ruby).html + # # the Tokens object is here -------^ + # + # See how small it is? ;) + # + # Tokens gives you the power to handle pre-scanned code very easily: + # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string + # that you put in your DB. + # + # Tokens' subclass TokenStream allows streaming to save memory. + class Tokens < Array + + class << self + + # Convert the token to a string. + # + # This format is used by Encoders.Tokens. + # It can be reverted using read_token. + def write_token text, type + if text.is_a? String + "#{type}\t#{escape(text)}\n" + else + ":#{text}\t#{type}\t\n" + end + end + + # Read a token from the string. + # + # Inversion of write_token. + # + # TODO Test this! + def read_token token + type, text = token.split("\t", 2) + if type[0] == ?: + [text.to_sym, type[1..-1].to_sym] + else + [type.to_sym, unescape(text)] + end + end + + # Escapes a string for use in write_token. + def escape text + text.gsub(/[\n\\]/, '\\\\\&') + end + + # Unescapes a string created by escape. + def unescape text + text.gsub(/\\[\n\\]/) { |m| m[1,1] } + end + + end + + # Whether the object is a TokenStream. + # + # Returns false. + def stream? + false + end + + # Iterates over all tokens. + # + # If a filter is given, only tokens of that kind are yielded. + def each kind_filter = nil, &block + unless kind_filter + super(&block) + else + super() do |text, kind| + next unless kind == kind_filter + yield text, kind + end + end + end + + # Iterates over all text tokens. + # Range tokens like [:open, :string] are left out. + # + # Example: + # tokens.each_text_token { |text, kind| text.replace html_escape(text) } + def each_text_token + each do |text, kind| + next unless text.respond_to? :to_str + yield text, kind + end + end + + # Encode the tokens using encoder. + # + # encoder can be + # * a symbol like :html oder :statistic + # * an Encoder class + # * an Encoder object + # + # options are passed to the encoder. + def encode encoder, options = {} + unless encoder.is_a? Encoders::Encoder + unless encoder.is_a? Class + encoder_class = Encoders[encoder] + end + encoder = encoder_class.new options + end + encoder.encode_tokens self, options + end + + + # Turn into a string using Encoders::Text. + # + # +options+ are passed to the encoder if given. + def to_s options = {} + encode :text, options + end + + + # Redirects unknown methods to encoder calls. + # + # For example, if you call +tokens.html+, the HTML encoder + # is used to highlight the tokens. + def method_missing meth, options = {} + Encoders[meth].new(options).encode_tokens self + end + + # Returns the tokens compressed by joining consecutive + # tokens of the same kind. + # + # This can not be undone, but should yield the same output + # in most Encoders. It basically makes the output smaller. + # + # Combined with dump, it saves space for the cost of time. + # + # If the scanner is written carefully, this is not required - + # for example, consecutive //-comment lines could already be + # joined in one comment token by the Scanner. + def optimize + print ' Tokens#optimize: before: %d - ' % size if $DEBUG + last_kind = last_text = nil + new = self.class.new + each do |text, kind| + if text.is_a? String + if kind == last_kind + last_text << text + else + new << [last_text, last_kind] if last_kind + last_text = text + last_kind = kind + end + else + new << [last_text, last_kind] if last_kind + last_kind = last_text = nil + new << [text, kind] + end + end + new << [last_text, last_kind] if last_kind + print 'after: %d (%d saved = %2.0f%%)' % + [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG + new + end + + # Compact the object itself; see optimize. + def optimize! + replace optimize + end + + # Dumps the object into a String that can be saved + # in files or databases. + # + # The dump is created with Marshal.dump; + # In addition, it is gzipped using GZip.gzip. + # + # The returned String object includes Undumping + # so it has an #undump method. See Tokens.load. + # + # You can configure the level of compression, + # but the default value 7 should be what you want + # in most cases as it is a good comprimise between + # speed and compression rate. + # + # See GZip module. + def dump gzip_level = 7 + require 'coderay/helpers/gzip_simple' + dump = Marshal.dump self + dump = dump.gzip gzip_level + dump.extend Undumping + end + + # The total size of the tokens. + # Should be equal to the input size before + # scanning. + def text_size + map { |t, k| t }.join.size + end + + # Include this module to give an object an #undump + # method. + # + # The string returned by Tokens.dump includes Undumping. + module Undumping + # Calls Tokens.load with itself. + def undump + Tokens.load self + end + end + + # Undump the object using Marshal.load, then + # unzip it using GZip.gunzip. + # + # The result is commonly a Tokens object, but + # this is not guaranteed. + def Tokens.load dump + require 'coderay/helpers/gzip_simple' + dump = dump.gunzip + @dump = Marshal.load dump + end + + end + + + # = TokenStream + # + # The TokenStream class is a fake Array without elements. + # + # It redirects the method << to a block given at creation. + # + # This allows scanners and Encoders to use streaming (no + # tokens are saved, the input is highlighted the same time it + # is scanned) with the same code. + # + # See CodeRay.encode_stream and CodeRay.scan_stream + class TokenStream < Tokens + + # Whether the object is a TokenStream. + # + # Returns true. + def stream? + true + end + + # The Array is empty, but size counts the tokens given by <<. + attr_reader :size + + # Creates a new TokenStream that calls +block+ whenever + # its << method is called. + # + # Example: + # + # require 'coderay' + # + # token_stream = CodeRay::TokenStream.new do |kind, text| + # puts 'kind: %s, text size: %d.' % [kind, text.size] + # end + # + # token_stream << [:regexp, '/\d+/'] + # #-> kind: rexpexp, text size: 5. + # + def initialize &block + raise ArgumentError, 'Block expected for streaming.' unless block + @callback = block + @size = 0 + end + + # Calls +block+ with +token+ and increments size. + # + # Returns self. + def << token + @callback.call token + @size += 1 + self + end + + # This method is not implemented due to speed reasons. Use Tokens. + def text_size + raise NotImplementedError, 'This method is not implemented due to speed reasons.' + end + + # A TokenStream cannot be dumped. Use Tokens. + def dump + raise NotImplementedError, 'A TokenStream cannot be dumped.' + end + + # A TokenStream cannot be optimized. Use Tokens. + def optimize + raise NotImplementedError, 'A TokenStream cannot be optimized.' + end + + end + +end |