diff options
author | murphy <murphy@rubychan.de> | 2010-01-06 22:41:34 +0000 |
---|---|---|
committer | murphy <murphy@rubychan.de> | 2010-01-06 22:41:34 +0000 |
commit | 17592a6d37cb9f75d41e78affc2d43257dd18d94 (patch) | |
tree | 4fa03b3b76eaefd8335faf3e0576c132aeeb21b6 /etc/speedup/direct-stream.rb | |
parent | 9ec711fc9ce464e28fb5189ce034c12cf30687b2 (diff) | |
download | coderay-17592a6d37cb9f75d41e78affc2d43257dd18d94.tar.gz |
Added some benchmark experiments for highlighting without Tokens.
Diffstat (limited to 'etc/speedup/direct-stream.rb')
-rw-r--r-- | etc/speedup/direct-stream.rb | 174 |
1 files changed, 174 insertions, 0 deletions
diff --git a/etc/speedup/direct-stream.rb b/etc/speedup/direct-stream.rb new file mode 100644 index 0000000..3c15511 --- /dev/null +++ b/etc/speedup/direct-stream.rb @@ -0,0 +1,174 @@ +require 'strscan' +require 'benchmark' + +class Scanner < StringScanner + + def initialize code + super code + end + + def tokenize encoder = Tokens.new + scan_tokens encoder + encoder + end + +protected + + def scan_tokens encoder + until eos? + if matched = scan(/\s+/) + encoder.text_token matched, :space + elsif matched = scan(/!/) + encoder.text_token matched, :not_going_to_happen + elsif matched = scan(/=/) + encoder.text_token matched, :not_going_to_happen + elsif matched = scan(/%/) + encoder.text_token matched, :not_going_to_happen + elsif matched = scan(/\w+/) + encoder.text_token matched, :word + elsif matched = scan(/[,.]/) + encoder.text_token matched, :op + elsif scan(/\(/) + encoder.open :par + elsif scan(/\)/) + encoder.close :par + else + raise + end + end + end + +end + + +class Tokens < Array + alias token push + alias text_token push + alias block_token push + def open kind; push :open, kind end + def close kind; push :close, kind end +end + + +class Encoder + + def setup + @out = '' + @opened = [] + end + + def finish + while kind = @opened.pop + close kind + end + @out + end + + def encode_tokens tokens + setup + compile tokens + finish + end + + def encode_stream scanner + setup + scanner.tokenize self + finish + end + + def token content, kind + if content.is_a? ::String + text_token content, kind + elsif content.is_a? ::Symbol + block_token content, kind + else + raise 'Unknown token content type: %p' % [content] + end + end + + def text_token text, kind + @out << + if kind == :space + text + else + text.gsub!(/[)\\]/, '\\\\\0') # escape ) and \ + "#{kind}(#{text})" + end + end + + def block_token action, kind + case action + when :open + open kind + when :close + close kind + else + raise + end + end + + def open kind + @opened << kind + @out << "#{kind}<" + end + + def close kind + @opened.pop + @out << '>' + end + +protected + + def compile tokens + content = nil + for item in tokens + if content + case content + when ::String + text_token content, item + content = nil + when :open + open item + content = nil + when :close + close item + content = nil + when ::Symbol + block_token content, kind + content = nil + else + raise + end + else + content = item + end + end + raise if content + end + +end + +N = (10 ** (ARGV.first || 5).to_i) +code = " alpha, beta, (gamma).\n" * N +scanner = Scanner.new code +encoder = Encoder.new + +tokens = nil +time_scanning = Benchmark.realtime do + tokens = scanner.tokenize +end +puts 'Scanning: %0.2fs -- %0.0f kTok/s' % [time_scanning, tokens.size / 2 / time_scanning / 1000] + +time_encoding = Benchmark.realtime do + encoder.encode_tokens tokens +end +puts 'Encoding: %0.2fs -- %0.0f kTok/s' % [time_encoding, tokens.size / 2 / time_encoding / 1000] + +time = time_scanning + time_encoding +puts 'Together: %0.2fs -- %0.0f kTok/s' % [time, tokens.size / 2 / time / 1000] + +scanner.reset +time = Benchmark.realtime do + encoder.encode_stream scanner +end +puts 'Scanning + Encoding: %0.2fs -- %0.0f kTok/s' % [time, (N * 11 + 1) / time / 1000] |