diff options
author | Travis Oliphant <oliphant@enthought.com> | 2005-09-26 20:20:16 +0000 |
---|---|---|
committer | Travis Oliphant <oliphant@enthought.com> | 2005-09-26 20:20:16 +0000 |
commit | 45d01a4be1c4221132ba46d687e6af3a8df3329b (patch) | |
tree | ce3be5290e918def7c7187e747c5460193b0ca85 /scipy/weave/examples/md5_speed.py | |
parent | ccd1c3db37672627aa4fe0fdb5437f5dddc0fe86 (diff) | |
download | numpy-45d01a4be1c4221132ba46d687e6af3a8df3329b.tar.gz |
Moved weave
Diffstat (limited to 'scipy/weave/examples/md5_speed.py')
-rw-r--r-- | scipy/weave/examples/md5_speed.py | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/scipy/weave/examples/md5_speed.py b/scipy/weave/examples/md5_speed.py new file mode 100644 index 000000000..58edfbc81 --- /dev/null +++ b/scipy/weave/examples/md5_speed.py @@ -0,0 +1,89 @@ +""" +Storing actual strings instead of their md5 value appears to +be about 10 times faster. + +>>> md5_speed.run(200,50000) +md5 build(len,sec): 50000 0.870999932289 +md5 retrv(len,sec): 50000 0.680999994278 +std build(len,sec): 50000 0.259999990463 +std retrv(len,sec): 50000 0.0599999427795 + +This test actually takes several minutes to generate the random +keys used to populate the dictionaries. Here is a smaller run, +but with longer keys. + +>>> md5_speed.run(1000,4000) +md5 build(len,sec,per): 4000 0.129999995232 3.24999988079e-005 +md5 retrv(len,sec,per): 4000 0.129999995232 3.24999988079e-005 +std build(len,sec,per): 4000 0.0500000715256 1.25000178814e-005 +std retrv(len,sec,per): 4000 0.00999999046326 2.49999761581e-006 + +Results are similar, though not statistically to good because of +the short times used and the available clock resolution. + +Still, I think it is safe to say that, for speed, it is better +to store entire strings instead of using md5 versions of +their strings. Yeah, the expected result, but it never hurts +to check... + +""" +import random, md5, time, cStringIO + +def speed(n,m): + s = 'a'*n + t1 = time.time() + for i in range(m): + q= md5.new(s).digest() + t2 = time.time() + print (t2 - t1) / m + +#speed(50,1e6) + +def generate_random(avg_length,count): + all_str = [] + alphabet = 'abcdefghijklmnopqrstuvwxyz' + lo,hi = [30,avg_length*2+30] + for i in range(count): + new_str = cStringIO.StringIO() + l = random.randrange(lo,hi) + for i in range(l): + new_str.write(random.choice(alphabet)) + all_str.append(new_str.getvalue()) + return all_str + +def md5_dict(lst): + catalog = {} + t1 = time.time() + for s in lst: + key= md5.new(s).digest() + catalog[key] = None + t2 = time.time() + print 'md5 build(len,sec,per):', len(lst), t2 - t1, (t2-t1)/len(lst) + + t1 = time.time() + for s in lst: + key= md5.new(s).digest() + val = catalog[key] + t2 = time.time() + print 'md5 retrv(len,sec,per):', len(lst), t2 - t1, (t2-t1)/len(lst) + +def std_dict(lst): + catalog = {} + t1 = time.time() + for s in lst: + catalog[s] = None + t2 = time.time() + print 'std build(len,sec,per):', len(lst), t2 - t1, (t2-t1)/len(lst) + + t1 = time.time() + for s in lst: + val = catalog[s] + t2 = time.time() + print 'std retrv(len,sec,per):', len(lst), t2 - t1, (t2-t1)/len(lst) + +def run(m=200,n=10): + lst = generate_random(m,n) + md5_dict(lst) + std_dict(lst) + +run(2000,100)
\ No newline at end of file |