require 'set'
module CodeRay
module Encoders
  # = HTML Encoder
  #
  # This is CodeRay's most important highlighter:
  # It provides save, fast XHTML generation and CSS support.
  #
  # == Usage
  #
  #  require 'coderay'
  #  puts CodeRay.scan('Some /code/', :ruby).html  #-> a HTML page
  #  puts CodeRay.scan('Some /code/', :ruby).html(:wrap => :span)
  #  #-> Some /code/
  #  puts CodeRay.scan('Some /code/', :ruby).span  #-> the same
  #  
  #  puts CodeRay.scan('Some code', :ruby).html(
  #    :wrap => nil,
  #    :line_numbers => :inline,
  #    :css => :style
  #  )
  #  #-> 1  Some code
  #
  # == Options
  #
  # === :tab_width
  # Convert \t characters to +n+ spaces (a number.)
  # 
  # Default: 8
  #
  # === :css
  # How to include the styles; can be :class or :style.
  #
  # Default: :class
  #
  # === :wrap
  # Wrap in :page, :div, :span or nil.
  #
  # You can also use Encoders::Div and Encoders::Span.
  #
  # Default: nil
  #
  # === :title
  # 
  # The title of the HTML page (works only when :wrap is set to :page.)
  #
  # Default: 'CodeRay output'
  #
  # === :line_numbers
  # Include line numbers in :table, :inline, or nil (no line numbers)
  #
  # Default: nil
  #
  # === :line_number_anchors
  # Adds anchors and links to the line numbers. Can be false (off), true (on),
  # or a prefix string that will be prepended to the anchor name.
  #
  # The prefix must consist only of letters, digits, and underscores.
  #
  # Default: true, default prefix name: "line"
  #
  # === :line_number_start
  # Where to start with line number counting.
  #
  # Default: 1
  #
  # === :bold_every
  # Make every +n+-th number appear bold.
  #
  # Default: 10
  #
  # === :highlight_lines
  # 
  # Highlights certain line numbers.
  # Can be any Enumerable, typically just an Array or Range, of numbers.
  # 
  # Bolding is deactivated when :highlight_lines is set. It only makes sense
  # in combination with :line_numbers.
  #
  # Default: nil
  #
  # === :hint
  # Include some information into the output using the title attribute.
  # Can be :info (show token kind on mouse-over), :info_long (with full path)
  # or :debug (via inspect).
  #
  # Default: false
  class HTML < Encoder
    register_for :html
    FILE_EXTENSION = 'html'
    DEFAULT_OPTIONS = {
      :tab_width => 8,
      :css => :class,
      :style => :alpha,
      :wrap => nil,
      :title => 'CodeRay output',
      :line_numbers => nil,
      :line_number_anchors => 'n',
      :line_number_start => 1,
      :bold_every => 10,
      :highlight_lines => nil,
      :hint => false,
    }
    
    # TODO: Make Plugin use autoload, too.
    helper :output, :css
    autoload :Numbering, 'coderay/encoders/html/numbering'
    attr_reader :css
  protected
    HTML_ESCAPE = {  #:nodoc:
      '&' => '&',
      '"' => '"',
      '>' => '>',
      '<' => '<',
    }
    # This was to prevent illegal HTML.
    # Strange chars should still be avoided in codes.
    evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s]
    evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' }
    #ansi_chars = Array(0x7f..0xff)
    #ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '%d;' % i }
    # \x9 (\t) and \xA (\n) not included
    #HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/
    HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/
    TOKEN_KIND_TO_INFO = Hash.new do |h, kind|
      h[kind] =
        case kind
        when :pre_constant
          'Predefined constant'
        else
          kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize }
        end
    end
    TRANSPARENT_TOKEN_KINDS = Set[
      :delimiter, :modifier, :content, :escape, :inline_delimiter,
    ]
    # Generate a hint about the given +kinds+ in a +hint+ style.
    #
    # +hint+ may be :info, :info_long or :debug.
    def self.token_path_to_hint hint, kinds
      # FIXME: TRANSPARENT_TOKEN_KINDS?
      # if TRANSPARENT_TOKEN_KINDS.include? kinds.first
      #   kinds = kinds[1..-1]
      # else
      #   kinds = kinds[1..-1] + kinds.first
      # end
      title =
        case hint
        when :info
          TOKEN_KIND_TO_INFO[Array(kinds).first]
        when :info_long
          kinds.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/')
        when :debug
          kinds.inspect
        end
      title ? " title=\"#{title}\"" : ''
    end
    def setup options
      super
      
      @HTML_ESCAPE = HTML_ESCAPE.dup
      @HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
      
      @opened = []
      @last_opened = nil
      @css = CSS.new options[:style]
      
      hint = options[:hint]
      if hint and not [:debug, :info, :info_long].include? hint
        raise ArgumentError, "Unknown value %p for :hint; \
          expected :info, :debug, false, or nil." % hint
      end
      
      css_classes = Tokens::AbbreviationForKind
      case options[:css]
      when :class
        @css_style = Hash.new do |h, k|
          kind = k.is_a?(Symbol) ? k : k.first
          h[k.is_a?(Symbol) ? k : k.dup] =
            if kind != :space && (hint || css_classes[kind])
              title = HTML.token_path_to_hint hint, k if hint
              css_class = css_classes[k]
              ""
            end
        end
      when :style
        @css_style = Hash.new do |h, k|
          kind = k.is_a?(Symbol) ? k : k.first
          h[k.is_a?(Symbol) ? k : k.dup] =
            if kind != :space && (hint || css_classes[kind])
              title = HTML.token_path_to_hint hint, k if hint
              style = @css.get_style Array(k).map { |c| css_classes[c] }
              ""
            end
        end
      else
        raise ArgumentError, "Unknown value %p for :css." % options[:css]
      end
    end
    def finish options
      unless @opened.empty?
        warn '%d tokens still open: %p' % [@opened.size, @opened]
        @out << '' while @opened.pop
        @last_opened = nil
      end
      
      @out.extend Output
      @out.css = @css
      if options[:line_numbers]
        Numbering.number! @out, options[:line_numbers], options
      end
      @out.wrap! options[:wrap]
      @out.apply_title! options[:title]
      
      super
    end
    
  public
    
    def text_token text, kind
      if text =~ /#{HTML_ESCAPE_PATTERN}/o
        text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
      end
      if style = @css_style[@last_opened ? [kind, *@opened] : kind]
        @out << style << text << ''
      else
        @out << text
      end
    end
    
    # token groups, eg. strings
    def begin_group kind
      @out << (@css_style[@last_opened ? [kind, *@opened] : kind] || '')
      @opened << kind
      @last_opened = kind if @options[:css] == :style
    end
    
    def end_group kind
      if $CODERAY_DEBUG && (@opened.empty? || @opened.last != kind)
        warn 'Malformed token stream: Trying to close a token (%p) ' \
          'that is not open. Open are: %p.' % [kind, @opened[1..-1]]
      end
      if @opened.pop
        @out << ''
        @last_opened = @opened.last if @last_opened
      end
    end
    
    # whole lines to be highlighted, eg. a deleted line in a diff
    def begin_line kind
      if style = @css_style[@last_opened ? [kind, *@opened] : kind]
        @out << style.sub(''
      end
      @opened << kind
      @last_opened = kind if @options[:css] == :style
    end
    
    def end_line kind
      if $CODERAY_DEBUG && (@opened.empty? || @opened.last != kind)
        warn 'Malformed token stream: Trying to close a line (%p) ' \
          'that is not open. Open are: %p.' % [kind, @opened[1..-1]]
      end
      if @opened.pop
        @out << '