1 files changed, 247 insertions, 0 deletions
diff --git a/weave/examples/vq.py b/weave/examples/vq.py
new file mode 100644
index 000000000..74816b469
--- /dev/null
+++ b/weave/examples/vq.py
@@ -0,0 +1,247 @@
+""" 
+"""
+# C:\home\ej\wrk\scipy\compiler\examples>python vq.py
+# vq with 1000 observation, 10 features and 30 codes fo 100 iterations
+#  speed in python: 0.150119999647
+# [25 29] [ 2.49147266  3.83021032]
+#  speed in standard c: 0.00710999965668
+# [25 29] [ 2.49147266  3.83021032]
+#  speed up: 21.11
+#  speed inline/blitz: 0.0186300003529
+# [25 29] [ 2.49147272  3.83021021]
+#  speed up: 8.06
+#  speed inline/blitz2: 0.00461000084877
+# [25 29] [ 2.49147272  3.83021021]
+#  speed up: 32.56
+ 
+import Numeric
+from Numeric import *
+import sys
+sys.path.insert(0,'..')
+import inline_tools
+from blitz_tools import blitz_type_factories
+import scalar_spec
+
+def vq(obs,code_book):
+    # make sure we're looking at arrays.
+    obs = asarray(obs)
+    code_book = asarray(code_book)
+    # check for 2d arrays and compatible sizes.
+    obs_sh = shape(obs)
+    code_book_sh = shape(code_book)
+    assert(len(obs_sh) == 2 and len(code_book_sh) == 2)   
+    assert(obs_sh[1] == code_book_sh[1])   
+    type = scalar_spec.numeric_to_blitz_type_mapping[obs.typecode()]
+    # band aid for now.
+    ar_type = 'PyArray_FLOAT'
+    code =  """
+            #line 37 "vq.py"
+            // Use tensor notation.            
+            blitz::Array<%(type)s,2> dist_sq(_Ncode_book[0],_Nobs[0]);
+ 	        blitz::firstIndex i;    
+            blitz::secondIndex j;   
+            blitz::thirdIndex k;
+            dist_sq = sum(pow2(obs(j,k) - code_book(i,k)),k);
+            // Surely there is a better way to do this...
+            PyArrayObject* py_code = (PyArrayObject*) PyArray_FromDims(1,&_Nobs[0],PyArray_LONG);
+ 	        blitz::Array<int,1> code((int*)(py_code->data),
+                                     blitz::shape(_Nobs[0]), blitz::neverDeleteData);
+ 	        code = minIndex(dist_sq(j,i),j);
+ 	        
+ 	        PyArrayObject* py_min_dist = (PyArrayObject*) PyArray_FromDims(1,&_Nobs[0],PyArray_FLOAT);
+ 	        blitz::Array<float,1> min_dist((float*)(py_min_dist->data),
+ 	                                       blitz::shape(_Nobs[0]), blitz::neverDeleteData);
+ 	        min_dist = sqrt(min(dist_sq(j,i),j));
+ 	        Py::Tuple results(2);
+ 	        results[0] = Py::Object((PyObject*)py_code);
+ 	        results[1] = Py::Object((PyObject*)py_min_dist);
+ 	        return_val = Py::new_reference_to(results); 	        
+            """ % locals()
+    code, distortion = inline_tools.inline(code,['obs','code_book'],
+                                           type_factories = blitz_type_factories,
+                                           compiler = 'gcc',
+                                           verbose = 1)
+    return code, distortion
+
+def vq2(obs,code_book):
+    """ doesn't use blitz (except in conversion)
+        ALSO DOES NOT HANDLE STRIDED ARRAYS CORRECTLY
+    """
+    # make sure we're looking at arrays.
+    obs = asarray(obs)
+    code_book = asarray(code_book)
+    # check for 2d arrays and compatible sizes.
+    obs_sh = shape(obs)
+    code_book_sh = shape(code_book)
+    assert(len(obs_sh) == 2 and len(code_book_sh) == 2)   
+    assert(obs_sh[1] == code_book_sh[1])   
+    assert(obs.typecode() == code_book.typecode())   
+    type = scalar_spec.numeric_to_blitz_type_mapping[obs.typecode()]
+    # band aid for now.
+    ar_type = 'PyArray_FLOAT'
+    code =  """
+            #line 83 "vq.py"
+            // THIS DOES NOT HANDLE STRIDED ARRAYS CORRECTLY
+            // Surely there is a better way to do this...
+            PyArrayObject* py_code = (PyArrayObject*) PyArray_FromDims(1,&_Nobs[0],PyArray_LONG);	        
+ 	        PyArrayObject* py_min_dist = (PyArrayObject*) PyArray_FromDims(1,&_Nobs[0],PyArray_FLOAT);
+ 	        
+            int* raw_code = (int*)(py_code->data);
+            float* raw_min_dist = (float*)(py_min_dist->data);
+            %(type)s* raw_obs = obs.data();
+            %(type)s* raw_code_book = code_book.data(); 
+            %(type)s* this_obs = NULL;
+            %(type)s* this_code = NULL; 
+            int Nfeatures = _Nobs[1];
+            float diff,dist;
+            for(int i=0; i < _Nobs[0]; i++)
+            {
+                this_obs = &raw_obs[i*Nfeatures];
+                raw_min_dist[i] = (%(type)s)10000000.; // big number
+                for(int j=0; j < _Ncode_book[0]; j++)
+                {
+                    this_code = &raw_code_book[j*Nfeatures];
+                    dist = 0;
+                    for(int k=0; k < Nfeatures; k++)
+                    {
+                        diff = this_obs[k] - this_code[k];
+                        dist +=  diff*diff;
+                    }
+                    dist = dist;
+                    if (dist < raw_min_dist[i])
+                    {
+                        raw_code[i] = j;
+                        raw_min_dist[i] = dist;                           
+                    }    
+                }
+                raw_min_dist[i] = sqrt(raw_min_dist[i]);
+ 	        }
+ 	        Py::Tuple results(2);
+ 	        results[0] = Py::Object((PyObject*)py_code);
+ 	        results[1] = Py::Object((PyObject*)py_min_dist);
+ 	        return_val = Py::new_reference_to(results); 	        
+            """ % locals()
+    code, distortion = inline_tools.inline(code,['obs','code_book'],
+                                         type_factories = blitz_type_factories,
+                                         compiler = 'gcc',
+                                         verbose = 1)
+    return code, distortion
+
+from standard_array_spec import standard_array_factories
+
+def vq3(obs,code_book):
+    """ Uses standard array conversion completely bi-passing blitz.
+        THIS DOES NOT HANDLE STRIDED ARRAYS CORRECTLY
+    """
+    # make sure we're looking at arrays.
+    obs = asarray(obs)
+    code_book = asarray(code_book)
+    # check for 2d arrays and compatible sizes.
+    obs_sh = shape(obs)
+    code_book_sh = shape(code_book)
+    assert(len(obs_sh) == 2 and len(code_book_sh) == 2)   
+    assert(obs_sh[1] == code_book_sh[1])   
+    assert(obs.typecode() == code_book.typecode())   
+    type = scalar_spec.numeric_to_blitz_type_mapping[obs.typecode()]
+    code =  """
+            #line 139 "vq.py"
+            // Surely there is a better way to do this...
+            PyArrayObject* py_code = (PyArrayObject*) PyArray_FromDims(1,&_Nobs[0],PyArray_LONG);	        
+ 	        PyArrayObject* py_min_dist = (PyArrayObject*) PyArray_FromDims(1,&_Nobs[0],PyArray_FLOAT);
+ 	        
+            int* code_data = (int*)(py_code->data);
+            float* min_dist_data = (float*)(py_min_dist->data);
+            %(type)s* this_obs = NULL;
+            %(type)s* this_code = NULL; 
+            int Nfeatures = _Nobs[1];
+            float diff,dist;
+
+            for(int i=0; i < _Nobs[0]; i++)
+            {
+                this_obs = &obs_data[i*Nfeatures];
+                min_dist_data[i] = (float)10000000.; // big number
+                for(int j=0; j < _Ncode_book[0]; j++)
+                {
+                    this_code = &code_book_data[j*Nfeatures];
+                    dist = 0;
+                    for(int k=0; k < Nfeatures; k++)
+                    {
+                        diff = this_obs[k] - this_code[k];
+                        dist +=  diff*diff;
+                    }
+                    if (dist < min_dist_data[i])
+                    {
+                        code_data[i] = j;
+                        min_dist_data[i] = dist;                           
+                    }    
+                }
+                min_dist_data[i] = sqrt(min_dist_data[i]);
+ 	        }
+ 	        Py::Tuple results(2);
+ 	        results[0] = Py::Object((PyObject*)py_code);
+ 	        results[1] = Py::Object((PyObject*)py_min_dist);
+ 	        return Py::new_reference_to(results); 	        
+            """ % locals()
+    # this is an unpleasant way to specify type factories -- work on it.
+    import ext_tools
+    code, distortion = inline_tools.inline(code,['obs','code_book'])
+    return code, distortion
+
+import time
+import RandomArray
+def compare(m,Nobs,Ncodes,Nfeatures):
+    obs = RandomArray.normal(0.,1.,(Nobs,Nfeatures))
+    codes = RandomArray.normal(0.,1.,(Ncodes,Nfeatures))
+    import scipy.cluster.vq
+    scipy.cluster.vq
+    print 'vq with %d observation, %d features and %d codes for %d iterations' % \
+           (Nobs,Nfeatures,Ncodes,m)
+    t1 = time.time()
+    for i in range(m):
+        code,dist = scipy.cluster.vq.py_vq(obs,codes)
+    t2 = time.time()
+    py = (t2-t1)
+    print ' speed in python:', (t2 - t1)/m
+    print code[:2],dist[:2]
+    
+    t1 = time.time()
+    for i in range(m):
+        code,dist = scipy.cluster.vq.vq(obs,codes)
+    t2 = time.time()
+    print ' speed in standard c:', (t2 - t1)/m
+    print code[:2],dist[:2]
+    print ' speed up: %3.2f' % (py/(t2-t1))
+    
+    # load into cache    
+    b = vq(obs,codes)
+    t1 = time.time()
+    for i in range(m):
+        code,dist = vq(obs,codes)
+    t2 = time.time()
+    print ' speed inline/blitz:',(t2 - t1)/ m    
+    print code[:2],dist[:2]
+    print ' speed up: %3.2f' % (py/(t2-t1))
+
+    # load into cache    
+    b = vq2(obs,codes)
+    t1 = time.time()
+    for i in range(m):
+        code,dist = vq2(obs,codes)
+    t2 = time.time()
+    print ' speed inline/blitz2:',(t2 - t1)/ m    
+    print code[:2],dist[:2]
+    print ' speed up: %3.2f' % (py/(t2-t1))
+
+    # load into cache    
+    b = vq3(obs,codes)
+    t1 = time.time()
+    for i in range(m):
+        code,dist = vq3(obs,codes)
+    t2 = time.time()
+    print ' speed using C arrays:',(t2 - t1)/ m    
+    print code[:2],dist[:2]
+    print ' speed up: %3.2f' % (py/(t2-t1))
+    
+if __name__ == "__main__":
+    compare(100,1000,30,10)    
+    #compare(1,10,2,10)