summaryrefslogtreecommitdiff
path: root/etc/speedup/direct-stream.rb
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2010-01-06 22:41:34 +0000
committermurphy <murphy@rubychan.de>2010-01-06 22:41:34 +0000
commit17592a6d37cb9f75d41e78affc2d43257dd18d94 (patch)
tree4fa03b3b76eaefd8335faf3e0576c132aeeb21b6 /etc/speedup/direct-stream.rb
parent9ec711fc9ce464e28fb5189ce034c12cf30687b2 (diff)
downloadcoderay-17592a6d37cb9f75d41e78affc2d43257dd18d94.tar.gz
Added some benchmark experiments for highlighting without Tokens.
Diffstat (limited to 'etc/speedup/direct-stream.rb')
-rw-r--r--etc/speedup/direct-stream.rb174
1 files changed, 174 insertions, 0 deletions
diff --git a/etc/speedup/direct-stream.rb b/etc/speedup/direct-stream.rb
new file mode 100644
index 0000000..3c15511
--- /dev/null
+++ b/etc/speedup/direct-stream.rb
@@ -0,0 +1,174 @@
+require 'strscan'
+require 'benchmark'
+
+class Scanner < StringScanner
+
+ def initialize code
+ super code
+ end
+
+ def tokenize encoder = Tokens.new
+ scan_tokens encoder
+ encoder
+ end
+
+protected
+
+ def scan_tokens encoder
+ until eos?
+ if matched = scan(/\s+/)
+ encoder.text_token matched, :space
+ elsif matched = scan(/!/)
+ encoder.text_token matched, :not_going_to_happen
+ elsif matched = scan(/=/)
+ encoder.text_token matched, :not_going_to_happen
+ elsif matched = scan(/%/)
+ encoder.text_token matched, :not_going_to_happen
+ elsif matched = scan(/\w+/)
+ encoder.text_token matched, :word
+ elsif matched = scan(/[,.]/)
+ encoder.text_token matched, :op
+ elsif scan(/\(/)
+ encoder.open :par
+ elsif scan(/\)/)
+ encoder.close :par
+ else
+ raise
+ end
+ end
+ end
+
+end
+
+
+class Tokens < Array
+ alias token push
+ alias text_token push
+ alias block_token push
+ def open kind; push :open, kind end
+ def close kind; push :close, kind end
+end
+
+
+class Encoder
+
+ def setup
+ @out = ''
+ @opened = []
+ end
+
+ def finish
+ while kind = @opened.pop
+ close kind
+ end
+ @out
+ end
+
+ def encode_tokens tokens
+ setup
+ compile tokens
+ finish
+ end
+
+ def encode_stream scanner
+ setup
+ scanner.tokenize self
+ finish
+ end
+
+ def token content, kind
+ if content.is_a? ::String
+ text_token content, kind
+ elsif content.is_a? ::Symbol
+ block_token content, kind
+ else
+ raise 'Unknown token content type: %p' % [content]
+ end
+ end
+
+ def text_token text, kind
+ @out <<
+ if kind == :space
+ text
+ else
+ text.gsub!(/[)\\]/, '\\\\\0') # escape ) and \
+ "#{kind}(#{text})"
+ end
+ end
+
+ def block_token action, kind
+ case action
+ when :open
+ open kind
+ when :close
+ close kind
+ else
+ raise
+ end
+ end
+
+ def open kind
+ @opened << kind
+ @out << "#{kind}<"
+ end
+
+ def close kind
+ @opened.pop
+ @out << '>'
+ end
+
+protected
+
+ def compile tokens
+ content = nil
+ for item in tokens
+ if content
+ case content
+ when ::String
+ text_token content, item
+ content = nil
+ when :open
+ open item
+ content = nil
+ when :close
+ close item
+ content = nil
+ when ::Symbol
+ block_token content, kind
+ content = nil
+ else
+ raise
+ end
+ else
+ content = item
+ end
+ end
+ raise if content
+ end
+
+end
+
+N = (10 ** (ARGV.first || 5).to_i)
+code = " alpha, beta, (gamma).\n" * N
+scanner = Scanner.new code
+encoder = Encoder.new
+
+tokens = nil
+time_scanning = Benchmark.realtime do
+ tokens = scanner.tokenize
+end
+puts 'Scanning: %0.2fs -- %0.0f kTok/s' % [time_scanning, tokens.size / 2 / time_scanning / 1000]
+
+time_encoding = Benchmark.realtime do
+ encoder.encode_tokens tokens
+end
+puts 'Encoding: %0.2fs -- %0.0f kTok/s' % [time_encoding, tokens.size / 2 / time_encoding / 1000]
+
+time = time_scanning + time_encoding
+puts 'Together: %0.2fs -- %0.0f kTok/s' % [time, tokens.size / 2 / time / 1000]
+
+scanner.reset
+time = Benchmark.realtime do
+ encoder.encode_stream scanner
+end
+puts 'Scanning + Encoding: %0.2fs -- %0.0f kTok/s' % [time, (N * 11 + 1) / time / 1000]