Reduce the amount of data translation by having the tracers record data in a form more like it will be consumed. Also should reduce the amount of work the tracers have to do.

author: Ned Batchelder <ned@nedbatchelder.com> 2009-10-11 20:17:24 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2009-10-11 20:17:24 -0400
commit: d93e5d5da230876e946aa94f59e706d3b798c62b (patch)
tree: e243592dfb6c30e766ae4363472d788effd00583
parent: 064f6f18ea75af5f28a9adf959e902f6c3010bb6 (diff)
download: python-coveragepy-git-d93e5d5da230876e946aa94f59e706d3b798c62b.tar.gz
4 files changed, 54 insertions, 56 deletions
diff --git a/coverage/collector.py b/coverage/collector.py
index 8cb72dcd..4743ad52 100644
--- a/coverage/collector.py
+++ b/coverage/collector.py
@@ -33,9 +33,9 @@ class PyTracer:
         self.data = None
         self.should_trace = None
         self.should_trace_cache = None
-        self.cur_filename = None
+        self.cur_file_data = None
         self.last_line = 0
-        self.filename_stack = []
+        self.data_stack = []
         self.last_exc_back = None
         self.arcs = False
 
@@ -48,32 +48,37 @@ class PyTracer:
         if self.last_exc_back:
             if frame == self.last_exc_back:
                 # Someone forgot a return event.
-                if self.arcs and self.cur_filename:
-                    self.data[(self.cur_filename, self.last_line, 0)] = True
-                self.cur_filename, self.last_line = self.filename_stack.pop()
+                if self.arcs and self.cur_file_data:
+                    self.cur_file_data[(self.last_line, 0)] = True
+                self.cur_file_data, self.last_line = self.data_stack.pop()
             self.last_exc_back = None
             
         if event == 'call':
             # Entering a new function context.  Decide if we should trace
             # in this file.
-            self.filename_stack.append((self.cur_filename, self.last_line))
+            self.data_stack.append((self.cur_file_data, self.last_line))
             filename = frame.f_code.co_filename
             tracename = self.should_trace(filename, frame)
-            self.cur_filename = tracename
+            if tracename:
+                if tracename not in self.data:
+                    self.data[tracename] = {}
+                self.cur_file_data = self.data[tracename]
+            else:
+                self.cur_file_data = None
             self.last_line = 0
         elif event == 'line':
             # Record an executed line.
-            if self.cur_filename:
+            if self.cur_file_data is not None:
                 if self.arcs:
-                    self.data[(self.cur_filename, self.last_line, frame.f_lineno)] = True
+                    self.cur_file_data[(self.last_line, frame.f_lineno)] = True
                 else:
-                    self.data[(self.cur_filename, frame.f_lineno)] = True
+                    self.cur_file_data[frame.f_lineno] = True
             self.last_line = frame.f_lineno
         elif event == 'return':
-            if self.arcs and self.cur_filename:
-                self.data[(self.cur_filename, self.last_line, 0)] = True
+            if self.arcs and self.cur_file_data:
+                self.cur_file_data[(self.last_line, 0)] = True
             # Leaving this function, pop the filename stack.
-            self.cur_filename, self.last_line = self.filename_stack.pop()
+            self.cur_file_data, self.last_line = self.data_stack.pop()
         elif event == 'exception':
             self.last_exc_back = frame.f_back
         return self._trace
@@ -141,8 +146,8 @@ class Collector:
 
     def reset(self):
         """Clear collected data, and prepare to collect more."""
-        # A dictionary with an entry for (Python source file name, line number
-        # in that file) if that line has been executed. TODO
+        # A dictionary mapping filenames to dicts with linenumber keys,
+        # or mapping filenames to dicts with linenumber pairs as keys.
         self.data = {}
         
         # A cache of the results from should_trace, the decision about whether
@@ -157,9 +162,9 @@ class Collector:
         """Start a new Tracer object, and store it in self.tracers."""
         tracer = self._trace_class()
         tracer.data = self.data
+        tracer.arcs = self.branch
         tracer.should_trace = self.should_trace
         tracer.should_trace_cache = self.should_trace_cache
-        tracer.arcs = self.branch
         tracer.start()
         self.tracers.append(tracer)
 
@@ -216,11 +221,11 @@ class Collector:
         threading.settrace(self._installation_trace)
 
     def get_line_data(self):
-        """Return the (filename, lineno) pairs collected."""
+        """Return the { filename: { lineno: True, ...}, ...} data collected."""
         if self.branch:
             return [(f,l) for f,l,_ in self.data.keys() if l]
         else:
-            return self.data.keys()
+            return self.data
 
     def get_arc_data(self):
         """Return the (filename, (from_line, to_line)) arc data collected."""
diff --git a/coverage/data.py b/coverage/data.py
index 28925f54..fd6256e1 100644
--- a/coverage/data.py
+++ b/coverage/data.py
@@ -147,14 +147,14 @@ class CoverageData:
                 for filename, file_data in new_lines.items():
                     self.lines.setdefault(filename, {}).update(file_data)
 
-    def add_line_data(self, data_points):
+    def add_line_data(self, line_data):
         """Add executed line data.
         
-        `data_points` is (filename, lineno) pairs.
+        `line_data` is { filename: { lineno: True, ... }, ...}
         
         """
-        for filename, lineno in data_points:
-            self.lines.setdefault(filename, {})[lineno] = True
+        for filename, linenos in line_data.items():
+            self.lines.setdefault(filename, {}).update(linenos)
 
     def add_arc_data(self, arc_data):
         for filename, arc in arc_data:
diff --git a/coverage/tracer.c b/coverage/tracer.c
index f52d66db..428df4d3 100644
--- a/coverage/tracer.c
+++ b/coverage/tracer.c
@@ -43,17 +43,17 @@ typedef struct {
     PyObject * should_trace_cache;
     PyObject * arcs;
     int started;
-    /* The index of the last-used entry in tracenames. */
+    /* The index of the last-used entry in data_stack. */
     int depth;
     /* Filenames to record at each level, or NULL if not recording. */
-    PyObject ** tracenames;     /* PyMem_Malloc'ed. */
-    int tracenames_alloc;       /* number of entries at tracenames. */
+    PyObject ** data_stack;     /* PyMem_Malloc'ed, each PyObject* is a borrowed ref. */
+    int data_stack_alloc;       /* number of entries at data_stack. */
     
     /* The parent frame for the last exception event, to fix missing returns. */
     PyFrameObject * last_exc_back;
 } Tracer;
 
-#define TRACENAMES_DELTA    100
+#define STACK_DELTA    100
 
 static int
 Tracer_init(Tracer *self, PyObject *args, PyObject *kwds)
@@ -63,11 +63,11 @@ Tracer_init(Tracer *self, PyObject *args, PyObject *kwds)
     self->should_trace_cache = NULL;
     self->started = 0;
     self->depth = -1;
-    self->tracenames = PyMem_Malloc(TRACENAMES_DELTA*sizeof(PyObject *));
-    if (self->tracenames == NULL) {
+    self->data_stack = PyMem_Malloc(STACK_DELTA*sizeof(PyObject *));
+    if (self->data_stack == NULL) {
         return -1;
     }
-    self->tracenames_alloc = TRACENAMES_DELTA;
+    self->data_stack_alloc = STACK_DELTA;
     self->last_exc_back = NULL;
     return 0;
 }
@@ -83,12 +83,7 @@ Tracer_dealloc(Tracer *self)
     Py_XDECREF(self->data);
     Py_XDECREF(self->should_trace_cache);
 
-    while (self->depth >= 0) {
-        Py_XDECREF(self->tracenames[self->depth]);
-        self->depth--;
-    }
-    
-    PyMem_Free(self->tracenames);
+    PyMem_Free(self->data_stack);
 
     Py_TYPE(self)->tp_free((PyObject*)self);
 }
@@ -174,7 +169,6 @@ Tracer_trace(Tracer *self, PyFrameObject *frame, int what, PyObject *arg)
             */
             if (self->depth >= 0) {
                 SHOWLOG(self->depth, frame->f_lineno, frame->f_code->co_filename, "missedreturn");
-                Py_XDECREF(self->tracenames[self->depth]);
                 self->depth--;
             }
         }
@@ -185,16 +179,16 @@ Tracer_trace(Tracer *self, PyFrameObject *frame, int what, PyObject *arg)
     switch (what) {
     case PyTrace_CALL:      /* 0 */
         self->depth++;
-        if (self->depth >= self->tracenames_alloc) {
-            /* We've outgrown our tracenames array: make it bigger. */
-            int bigger = self->tracenames_alloc + TRACENAMES_DELTA;
-            PyObject ** bigger_tracenames = PyMem_Realloc(self->tracenames, bigger * sizeof(PyObject *));
-            if (bigger_tracenames == NULL) {
+        if (self->depth >= self->data_stack_alloc) {
+            /* We've outgrown our data_stack array: make it bigger. */
+            int bigger = self->data_stack_alloc + STACK_DELTA;
+            PyObject ** bigger_data_stack = PyMem_Realloc(self->data_stack, bigger * sizeof(PyObject *));
+            if (bigger_data_stack == NULL) {
                 self->depth--;
                 return -1;
             }
-            self->tracenames = bigger_tracenames;
-            self->tracenames_alloc = bigger;
+            self->data_stack = bigger_data_stack;
+            self->data_stack_alloc = bigger;
         }
         /* Check if we should trace this line. */
         filename = frame->f_code->co_filename;
@@ -217,11 +211,17 @@ Tracer_trace(Tracer *self, PyFrameObject *frame, int what, PyObject *arg)
 
         /* If tracename is a string, then we're supposed to trace. */
         if (MyText_Check(tracename)) {
-            self->tracenames[self->depth] = tracename;
+            PyObject * file_data = PyDict_GetItem(self->data, tracename);
+            if (file_data == NULL) {
+                file_data = PyDict_New();
+                PyDict_SetItem(self->data, tracename, file_data);
+                Py_DECREF(file_data);
+            }
+            self->data_stack[self->depth] = file_data;
             SHOWLOG(self->depth, frame->f_lineno, filename, "traced");
         }
         else {
-            self->tracenames[self->depth] = NULL;
+            self->data_stack[self->depth] = NULL;
             Py_DECREF(tracename);
             SHOWLOG(self->depth, frame->f_lineno, filename, "skipped");
         }
@@ -231,7 +231,6 @@ Tracer_trace(Tracer *self, PyFrameObject *frame, int what, PyObject *arg)
         /* A near-copy of this code is above in the missing-return handler. */
         if (self->depth >= 0) {
             SHOWLOG(self->depth, frame->f_lineno, frame->f_code->co_filename, "return");
-            Py_XDECREF(self->tracenames[self->depth]);
             self->depth--;
         }
         break;
@@ -239,14 +238,8 @@ Tracer_trace(Tracer *self, PyFrameObject *frame, int what, PyObject *arg)
     case PyTrace_LINE:      /* 2 */
         if (self->depth >= 0) {
             SHOWLOG(self->depth, frame->f_lineno, frame->f_code->co_filename, "line");
-            if (self->tracenames[self->depth]) {
-                PyObject * t = PyTuple_New(2);
-                tracename = self->tracenames[self->depth];
-                Py_INCREF(tracename);
-                PyTuple_SET_ITEM(t, 0, tracename);
-                PyTuple_SET_ITEM(t, 1, MyInt_FromLong(frame->f_lineno));
-                PyDict_SetItem(self->data, t, Py_None);
-                Py_DECREF(t);
+            if (self->data_stack[self->depth]) {
+                PyDict_SetItem(self->data_stack[self->depth], MyInt_FromLong(frame->f_lineno), Py_None);
             }
         }
         break;
diff --git a/test/test_data.py b/test/test_data.py
index a2557a91..12dd80aa 100644
--- a/test/test_data.py
+++ b/test/test_data.py
@@ -9,13 +9,13 @@ sys.path.insert(0, os.path.split(__file__)[0]) # Force relative import for Py3k
 from coveragetest import CoverageTest
 
 
-DATA_1 = [ ('a.py',1), ('a.py',2), ('b.py',3) ]
+DATA_1 = { 'a.py': {1:None, 2:None}, 'b.py': {3:None} }
 SUMMARY_1 = { 'a.py':2, 'b.py':1 }
 EXECED_FILES_1 = [ 'a.py', 'b.py' ]
 A_PY_LINES_1 = [1,2]
 B_PY_LINES_1 = [3]
 
-DATA_2 = [ ('a.py',1), ('a.py',5), ('c.py',17) ]
+DATA_2 = { 'a.py': {1:None, 5:None}, 'c.py': {17:None} }
 SUMMARY_1_2 = { 'a.py':3, 'b.py':1, 'c.py':1 }
 EXECED_FILES_1_2 = [ 'a.py', 'b.py', 'c.py' ]
author	Ned Batchelder <ned@nedbatchelder.com>	2009-10-11 20:17:24 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2009-10-11 20:17:24 -0400
commit	d93e5d5da230876e946aa94f59e706d3b798c62b (patch)
tree	e243592dfb6c30e766ae4363472d788effd00583
parent	064f6f18ea75af5f28a9adf959e902f6c3010bb6 (diff)
download	python-coveragepy-git-d93e5d5da230876e946aa94f59e706d3b798c62b.tar.gz