diff options
-rw-r--r-- | etc/coderay-lib.tmproj | 83 | ||||
-rw-r--r-- | etc/speedup/current.rb | 132 | ||||
-rw-r--r-- | etc/speedup/direct-stream.rb | 174 |
3 files changed, 364 insertions, 25 deletions
diff --git a/etc/coderay-lib.tmproj b/etc/coderay-lib.tmproj index 0c3eaa3..53b50b3 100644 --- a/etc/coderay-lib.tmproj +++ b/etc/coderay-lib.tmproj @@ -2,11 +2,11 @@ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> <plist version="1.0"> <dict> + <key>currentDocument</key> + <string>speedup/current.rb</string> <key>documents</key> <array> <dict> - <key>expanded</key> - <true/> <key>name</key> <string>lib</string> <key>regexFolderFilter</key> @@ -32,9 +32,7 @@ <key>filename</key> <string>../Changes.textile</string> <key>lastUsed</key> - <date>2010-01-01T06:18:17Z</date> - <key>selected</key> - <true/> + <date>2010-01-01T07:33:23Z</date> </dict> <dict> <key>filename</key> @@ -51,6 +49,8 @@ <string>../ftp.yaml</string> </dict> <dict> + <key>expanded</key> + <true/> <key>name</key> <string>etc</string> <key>regexFolderFilter</key> @@ -97,8 +97,6 @@ <date>2010-01-01T05:57:27Z</date> </dict> <dict> - <key>expanded</key> - <true/> <key>name</key> <string>functional</string> <key>regexFolderFilter</key> @@ -110,13 +108,13 @@ <key>filename</key> <string>../test/scanners/coderay_suite.rb</string> <key>lastUsed</key> - <date>2010-01-01T06:09:10Z</date> + <date>2010-01-06T09:17:36Z</date> </dict> <dict> <key>filename</key> <string>../test/scanners/suite.rb</string> <key>lastUsed</key> - <date>2010-01-01T05:50:18Z</date> + <date>2010-01-04T01:36:09Z</date> </dict> <dict> <key>filename</key> @@ -126,7 +124,7 @@ </dict> </array> <key>fileHierarchyDrawerWidth</key> - <integer>200</integer> + <integer>151</integer> <key>metaData</key> <dict> <key>../lib/coderay/scanners/delphi.rb</key> @@ -187,20 +185,6 @@ <key>firstVisibleLine</key> <integer>0</integer> </dict> - <key>../lib/coderay/scanners/json.rb</key> - <dict> - <key>caret</key> - <dict> - <key>column</key> - <integer>25</integer> - <key>line</key> - <integer>15</integer> - </dict> - <key>firstVisibleColumn</key> - <integer>0</integer> - <key>firstVisibleLine</key> - <integer>0</integer> - </dict> <key>../lib/coderay/scanners/php.rb</key> <dict> <key>caret</key> @@ -289,10 +273,59 @@ <integer>76</integer> </dict> </dict> + <key>speedup/current.rb</key> + <dict> + <key>caret</key> + <dict> + <key>column</key> + <integer>38</integer> + <key>line</key> + <integer>115</integer> + </dict> + <key>firstVisibleColumn</key> + <integer>0</integer> + <key>firstVisibleLine</key> + <integer>95</integer> + </dict> + <key>speedup/direct-stream.rb</key> + <dict> + <key>caret</key> + <dict> + <key>column</key> + <integer>0</integer> + <key>line</key> + <integer>151</integer> + </dict> + <key>columnSelection</key> + <false/> + <key>firstVisibleColumn</key> + <integer>0</integer> + <key>firstVisibleLine</key> + <integer>139</integer> + <key>selectFrom</key> + <dict> + <key>column</key> + <integer>0</integer> + <key>line</key> + <integer>150</integer> + </dict> + <key>selectTo</key> + <dict> + <key>column</key> + <integer>0</integer> + <key>line</key> + <integer>151</integer> + </dict> + </dict> </dict> + <key>openDocuments</key> + <array> + <string>speedup/direct-stream.rb</string> + <string>speedup/current.rb</string> + </array> <key>showFileHierarchyDrawer</key> <true/> <key>windowFrame</key> - <string>{{203, 5}, {1067, 773}}</string> + <string>{{161, 4}, {1119, 774}}</string> </dict> </plist> diff --git a/etc/speedup/current.rb b/etc/speedup/current.rb new file mode 100644 index 0000000..71acae6 --- /dev/null +++ b/etc/speedup/current.rb @@ -0,0 +1,132 @@ +require 'strscan' +require 'benchmark' + +class Scanner < StringScanner + + def initialize code + super code + @tokens = Tokens.new + end + + def tokenize + scan_tokens @tokens + @tokens + end + +protected + + def scan_tokens tokens + until eos? + if matched = scan(/\s+/) + tokens << [matched, :space] + elsif matched = scan(/!/) + tokens << [matched, :not_going_to_happen] + elsif matched = scan(/=/) + tokens << [matched, :not_going_to_happen] + elsif matched = scan(/%/) + tokens << [matched, :not_going_to_happen] + elsif matched = scan(/\w+/) + tokens << [matched, :word] + elsif matched = scan(/[,.]/) + tokens << [matched, :op] + elsif scan(/\(/) + tokens << [:open, :par] + elsif scan(/\)/) + tokens << [:close, :par] + else + raise + end + end + end + +end + + +class Tokens < Array +end + + +class Encoder + + def encode_tokens tokens + @out = '' + compile tokens + @out + end + +protected + + if RUBY_VERSION >= '1.9' || defined?(JRUBY_VERSION) + def compile tokens + for text, kind in tokens + token text, kind + end + end + else + def compile tokens + tokens.each(&method(:token).to_proc) + end + end + + def token content, kind + encoded_token = + case content + when ::String + text_token content, kind + when :open + open kind + when :close + close kind + when ::Symbol + block_token content, kind + else + raise 'Unknown token content type: %p' % [content] + end + @out << encoded_token + end + + def text_token text, kind + if kind == :space + text + else + text.gsub!(/[)\\]/, '\\\\\0') # escape ) and \ + "#{kind}(#{text})" + end + end + + def block_token action, kind + case action + when :open + open kind + when :close + close kind + end + end + + def open kind + "#{kind}<" + end + + def close kind + '>' + end +end + +N = (10 ** (ARGV.first || 5).to_i) +code = " alpha, beta, (gamma).\n" * N +scanner = Scanner.new code +encoder = Encoder.new + +tokens = nil +time_scanning = Benchmark.realtime do + tokens = scanner.tokenize +end +puts 'Scanning: %0.2fs -- %0.0f kTok/s' % [time_scanning, tokens.size / time_scanning / 1000] + +time_encoding = Benchmark.realtime do + out = encoder.encode_tokens(tokens).size +end +puts 'Encoding: %0.2fs -- %0.0f kTok/s' % [time_encoding, tokens.size / time_encoding / 1000] + +time = time_scanning + time_encoding +puts 'Together: %0.2fs -- %0.0f kTok/s' % [time, tokens.size / time / 1000] diff --git a/etc/speedup/direct-stream.rb b/etc/speedup/direct-stream.rb new file mode 100644 index 0000000..3c15511 --- /dev/null +++ b/etc/speedup/direct-stream.rb @@ -0,0 +1,174 @@ +require 'strscan' +require 'benchmark' + +class Scanner < StringScanner + + def initialize code + super code + end + + def tokenize encoder = Tokens.new + scan_tokens encoder + encoder + end + +protected + + def scan_tokens encoder + until eos? + if matched = scan(/\s+/) + encoder.text_token matched, :space + elsif matched = scan(/!/) + encoder.text_token matched, :not_going_to_happen + elsif matched = scan(/=/) + encoder.text_token matched, :not_going_to_happen + elsif matched = scan(/%/) + encoder.text_token matched, :not_going_to_happen + elsif matched = scan(/\w+/) + encoder.text_token matched, :word + elsif matched = scan(/[,.]/) + encoder.text_token matched, :op + elsif scan(/\(/) + encoder.open :par + elsif scan(/\)/) + encoder.close :par + else + raise + end + end + end + +end + + +class Tokens < Array + alias token push + alias text_token push + alias block_token push + def open kind; push :open, kind end + def close kind; push :close, kind end +end + + +class Encoder + + def setup + @out = '' + @opened = [] + end + + def finish + while kind = @opened.pop + close kind + end + @out + end + + def encode_tokens tokens + setup + compile tokens + finish + end + + def encode_stream scanner + setup + scanner.tokenize self + finish + end + + def token content, kind + if content.is_a? ::String + text_token content, kind + elsif content.is_a? ::Symbol + block_token content, kind + else + raise 'Unknown token content type: %p' % [content] + end + end + + def text_token text, kind + @out << + if kind == :space + text + else + text.gsub!(/[)\\]/, '\\\\\0') # escape ) and \ + "#{kind}(#{text})" + end + end + + def block_token action, kind + case action + when :open + open kind + when :close + close kind + else + raise + end + end + + def open kind + @opened << kind + @out << "#{kind}<" + end + + def close kind + @opened.pop + @out << '>' + end + +protected + + def compile tokens + content = nil + for item in tokens + if content + case content + when ::String + text_token content, item + content = nil + when :open + open item + content = nil + when :close + close item + content = nil + when ::Symbol + block_token content, kind + content = nil + else + raise + end + else + content = item + end + end + raise if content + end + +end + +N = (10 ** (ARGV.first || 5).to_i) +code = " alpha, beta, (gamma).\n" * N +scanner = Scanner.new code +encoder = Encoder.new + +tokens = nil +time_scanning = Benchmark.realtime do + tokens = scanner.tokenize +end +puts 'Scanning: %0.2fs -- %0.0f kTok/s' % [time_scanning, tokens.size / 2 / time_scanning / 1000] + +time_encoding = Benchmark.realtime do + encoder.encode_tokens tokens +end +puts 'Encoding: %0.2fs -- %0.0f kTok/s' % [time_encoding, tokens.size / 2 / time_encoding / 1000] + +time = time_scanning + time_encoding +puts 'Together: %0.2fs -- %0.0f kTok/s' % [time, tokens.size / 2 / time / 1000] + +scanner.reset +time = Benchmark.realtime do + encoder.encode_stream scanner +end +puts 'Scanning + Encoding: %0.2fs -- %0.0f kTok/s' % [time, (N * 11 + 1) / time / 1000] |