summaryrefslogtreecommitdiff
path: root/etc/speedup/current.rb
blob: e98d0e2d6cf7d0d3a627c927209bcfd832a427ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
require 'strscan'
require 'benchmark'

class Scanner < StringScanner
  
  def initialize code
    super code
    @tokens = Tokens.new
  end
  
  def tokenize
    scan_tokens @tokens
    @tokens
  end
  
protected
  
  def scan_tokens tokens
    until eos?
      if matched = scan(/\s+/)
        tokens << [matched, :space]
      elsif matched = scan(/!/)
        tokens << [matched, :not_going_to_happen]
      elsif matched = scan(/=/)  #/
        tokens << [matched, :not_going_to_happen]
      elsif matched = scan(/%/)
        tokens << [matched, :not_going_to_happen]
      elsif matched = scan(/\w+/)
        tokens << [matched, :word]
      elsif matched = scan(/[,.]/)
        tokens << [matched, :op]
      elsif scan(/\(/)
        tokens << [:open, :par]
      elsif scan(/\)/)
        tokens << [:close, :par]
      else
        raise
      end
    end
  end
  
end


class Tokens < Array
end


class Encoder
  
  def encode_tokens tokens
    @out = ''
    compile tokens
    @out
  end
  
protected
  
  if RUBY_VERSION >= '1.9' || defined?(JRUBY_VERSION)
    def compile tokens
      for text, kind in tokens
        token text, kind
      end
    end
  else
    def compile tokens
      tokens.each(&method(:token).to_proc)
    end
  end
  
  def token content, kind
    encoded_token =
      case content
      when ::String
        text_token content, kind
      when :open
        open kind
      when :close
        close kind
      when ::Symbol
        block_token content, kind
      else
        raise 'Unknown token content type: %p' % [content]
      end
    @out << encoded_token
  end
  
  def text_token text, kind
    if kind == :space
      text
    else
      text.gsub!(/[)\\]/, '\\\\\0')  # escape ) and \
      "#{kind}(#{text})"
    end
  end
  
  def block_token action, kind
    case action
    when :open
      open kind
    when :close
      close kind
    end
  end
  
  def open kind
    "#{kind}<"
  end
  
  def close kind
    '>'
  end
end

N = (10 ** (ARGV.first || 5).to_i)
code = "  alpha, beta, (gamma).\n" * N
scanner = Scanner.new code
encoder = Encoder.new

tokens = nil
time_scanning = Benchmark.realtime do
  tokens = scanner.tokenize
end
puts 'Scanning: %0.2fs -- %0.0f kTok/s' % [time_scanning, tokens.size / time_scanning / 1000]

time_encoding = Benchmark.realtime do
  out = encoder.encode_tokens(tokens).size
end
puts 'Encoding: %0.2fs -- %0.0f kTok/s' % [time_encoding, tokens.size / time_encoding / 1000]

time = time_scanning + time_encoding
puts 'Together: %0.2fs -- %0.0f kTok/s' % [time, tokens.size / time / 1000]