summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--coverage/numbits.py144
-rw-r--r--coverage/sqldata.py75
-rw-r--r--tests/test_numbits.py115
3 files changed, 262 insertions, 72 deletions
diff --git a/coverage/numbits.py b/coverage/numbits.py
index abe40f41..4b340c8e 100644
--- a/coverage/numbits.py
+++ b/coverage/numbits.py
@@ -5,29 +5,62 @@
Functions to manipulate packed binary representations of number sets.
To save space, coverage stores sets of line numbers in SQLite using a packed
-binary representation called a numbits. A numbits is stored as a blob in the
-database. The exact meaning of the bytes in the blobs should be considered an
-implementation detail that might change in the future. Use these functions to
-work with those binary blobs of data.
+binary representation called a numbits. A numbits is a set of positive
+integers.
+
+A numbits is stored as a blob in the database. The exact meaning of the bytes
+in the blobs should be considered an implementation detail that might change in
+the future. Use these functions to work with those binary blobs of data.
"""
+from coverage import env
from coverage.backward import byte_to_int, bytes_to_ints, binary_bytes, zip_longest
-from coverage.misc import contract
+from coverage.misc import contract, new_contract
+
+if env.PY3:
+ def _to_blob(b):
+ """Convert a bytestring into a type SQLite will accept for a blob."""
+ return b
+ new_contract('blob', lambda v: isinstance(v, bytes))
+else:
+ def _to_blob(b):
+ """Convert a bytestring into a type SQLite will accept for a blob."""
+ return buffer(b) # pylint: disable=undefined-variable
-@contract(nums='Iterable', returns='bytes')
+ new_contract('blob', lambda v: isinstance(v, buffer)) # pylint: disable=undefined-variable
+
+@contract(nums='Iterable', returns='blob')
def nums_to_numbits(nums):
- """Convert `nums` (a non-empty iterable of ints) into a numbits."""
- nbytes = max(nums) // 8 + 1
+ """Convert `nums` into a numbits.
+
+ Arguments:
+ nums (a reusable iterable of integers): the line numbers to store.
+
+ Returns:
+ A binary blob.
+ """
+ try:
+ nbytes = max(nums) // 8 + 1
+ except ValueError:
+ # nums was empty.
+ return _to_blob(b'')
b = bytearray(nbytes)
for num in nums:
b[num//8] |= 1 << num % 8
- return bytes(b)
+ return _to_blob(bytes(b))
-@contract(numbits='bytes', returns='list[int]')
+@contract(numbits='blob', returns='list[int]')
def numbits_to_nums(numbits):
- """Convert a numbits into a list of numbers."""
+ """Convert a numbits into a list of numbers.
+
+ Arguments:
+ numbits (a binary blob): the packed number set.
+
+ Returns:
+ A list of integers.
+ """
nums = []
for byte_i, byte in enumerate(bytes_to_ints(numbits)):
for bit_i in range(8):
@@ -35,22 +68,95 @@ def numbits_to_nums(numbits):
nums.append(byte_i * 8 + bit_i)
return nums
-@contract(numbits1='bytes', numbits2='bytes', returns='bytes')
-def merge_numbits(numbits1, numbits2):
- """Merge two numbits"""
+@contract(numbits1='blob', numbits2='blob', returns='blob')
+def numbits_union(numbits1, numbits2):
+ """Compute the union of two numbits.
+
+ Arguments:
+ numbits1, numbits2: packed number sets.
+
+ Returns:
+ A new numbits, the union of the two number sets.
+ """
+ byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
+ return _to_blob(binary_bytes(b1 | b2 for b1, b2 in byte_pairs))
+
+@contract(numbits1='blob', numbits2='blob', returns='blob')
+def numbits_intersection(numbits1, numbits2):
+ """Compute the intersection of two numbits.
+
+ Arguments:
+ numbits1, numbits2: packed number sets.
+
+ Returns:
+ A new numbits, the intersection of the two number sets.
+ """
byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
- return binary_bytes(b1 | b2 for b1, b2 in byte_pairs)
+ intersection_bytes = binary_bytes(b1 & b2 for b1, b2 in byte_pairs)
+ return _to_blob(intersection_bytes.rstrip(b'\0'))
-@contract(numbits1='bytes', numbits2='bytes', returns='bool')
+@contract(numbits1='blob', numbits2='blob', returns='bool')
def numbits_any_intersection(numbits1, numbits2):
- """Is there any number that appears in both numbits?"""
+ """Is there any number that appears in both numbits?
+
+ Determine whether two number sets have a non-empty intersection. This is
+ faster than computing the intersection.
+
+ Arguments:
+ numbits1, numbits2: packed number sets.
+
+ Returns:
+ A boolean, true if there is any number in both of the number sets.
+ """
byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
return any(b1 & b2 for b1, b2 in byte_pairs)
-@contract(num='int', numbits='bytes', returns='bool')
+@contract(num='int', numbits='blob', returns='bool')
def num_in_numbits(num, numbits):
- """Does the integer `num` appear in `numbits`?"""
+ """Does the integer `num` appear in `numbits`?
+
+ Arguments:
+ num (integer)
+
+ numbits (binary blob)
+
+ Returns:
+ A boolean, true if `num` is a member of `numbits`.
+ """
nbyte, nbit = divmod(num, 8)
if nbyte >= len(numbits):
return False
return bool(byte_to_int(numbits[nbyte]) & (1 << nbit))
+
+def register_sqlite_functions(connection):
+ """
+ Define numbits functions in a SQLite connection.
+
+ This defines these functions for use in SQLite statements:
+
+ * :func:`numbits_union`
+ * :func:`numbits_intersection`
+ * :func:`numbits_any_intersection`
+ * :func:`num_in_numbits`
+
+ `connection` is a :class:`sqlite3.Connection <python:sqlite3.Connection>`
+ object. After creating the connection, pass it to this function to
+ register the numbits functions. Then you can use numbits functions in your
+ queries::
+
+ import sqlite3
+ from coverage.numbits import register_sqlite_functions
+
+ conn = sqlite3.connect('example.db')
+ register_sqlite_functions(conn)
+ c = conn.cursor()
+ c.execute(
+ "select lb.file_id, lb.context_id from line_bits lb"
+ "where num_in_numbits(?, lb.numbits)",
+ (interesting_line_number,)
+ )
+ """
+ connection.create_function("numbits_union", 2, numbits_union)
+ connection.create_function("numbits_intersection", 2, numbits_intersection)
+ connection.create_function("numbits_any_intersection", 2, numbits_any_intersection)
+ connection.create_function("num_in_numbits", 2, num_in_numbits)
diff --git a/coverage/sqldata.py b/coverage/sqldata.py
index 3ee34f0f..5e7edd72 100644
--- a/coverage/sqldata.py
+++ b/coverage/sqldata.py
@@ -20,16 +20,18 @@ import zlib
from coverage.backward import get_thread_id, iitems, to_bytes, to_string
from coverage.debug import NoDebugging, SimpleReprMixin
-from coverage import env
from coverage.files import PathAliases
from coverage.misc import CoverageException, file_be_gone, filename_suffix, isolate_module
from coverage.misc import contract
-from coverage.numbits import nums_to_numbits, numbits_to_nums, merge_numbits
+from coverage.numbits import nums_to_numbits, numbits_to_nums, numbits_union
from coverage.version import __version__
os = isolate_module(os)
-SCHEMA_VERSION = 6
+# If you change the schema, increment the SCHEMA_VERSION, and update the
+# docs in docs/dbschema.rst also.
+
+SCHEMA_VERSION = 7
# Schema versions:
# 1: Released in 5.0a2
@@ -38,6 +40,7 @@ SCHEMA_VERSION = 6
# 4: Changed line_map.bitmap to line_map.numbits.
# 5: Added foreign key declarations.
# 6: Key-value in meta.
+# 7: line_map -> line_bits
SCHEMA = """
CREATE TABLE coverage_schema (
@@ -71,8 +74,9 @@ CREATE TABLE context (
unique (context)
);
-CREATE TABLE line_map (
- -- If recording lines, a row per context per line executed.
+CREATE TABLE line_bits (
+ -- If recording lines, a row per context per file executed.
+ -- All of the line numbers for that file/context are in one numbits.
file_id integer, -- foreign key to `file`.
context_id integer, -- foreign key to `context`.
numbits blob, -- see the numbits functions in coverage.numbits
@@ -100,24 +104,6 @@ CREATE TABLE tracer (
);
"""
-if env.PY2:
- def to_blob(b):
- """Convert a bytestring into a type SQLite will accept for a blob."""
- return buffer(b) # pylint: disable=undefined-variable
-
- def from_blob(blob):
- """Convert a blob read from SQLite into a bytestring."""
- return bytes(blob)
-else:
- def to_blob(b):
- """Convert a bytestring into a type SQLite will accept for a blob."""
- return b
-
- def from_blob(blob):
- """Convert a blob read from SQLite into a bytestring."""
- return blob
-
-
class CoverageData(SimpleReprMixin):
"""Manages collected coverage data, including file storage.
@@ -386,15 +372,15 @@ class CoverageData(SimpleReprMixin):
for filename, linenos in iitems(line_data):
linemap = nums_to_numbits(linenos)
file_id = self._file_id(filename, add=True)
- query = "select numbits from line_map where file_id = ? and context_id = ?"
+ query = "select numbits from line_bits where file_id = ? and context_id = ?"
existing = list(con.execute(query, (file_id, self._current_context_id)))
if existing:
- linemap = merge_numbits(linemap, from_blob(existing[0][0]))
+ linemap = numbits_union(linemap, existing[0][0])
con.execute(
- "insert or replace into line_map "
+ "insert or replace into line_bits "
" (file_id, context_id, numbits) values (?, ?, ?)",
- (file_id, self._current_context_id, to_blob(linemap)),
+ (file_id, self._current_context_id, linemap),
)
def add_arcs(self, arc_data):
@@ -530,13 +516,13 @@ class CoverageData(SimpleReprMixin):
# Get line data.
cur = conn.execute(
- 'select file.path, context.context, line_map.numbits '
- 'from line_map '
- 'inner join file on file.id = line_map.file_id '
- 'inner join context on context.id = line_map.context_id'
+ 'select file.path, context.context, line_bits.numbits '
+ 'from line_bits '
+ 'inner join file on file.id = line_bits.file_id '
+ 'inner join context on context.id = line_bits.context_id'
)
lines = {
- (files[path], context): from_blob(numbits)
+ (files[path], context): numbits
for (path, context, numbits) in cur
}
cur.close()
@@ -610,16 +596,15 @@ class CoverageData(SimpleReprMixin):
# Get line data.
cur = conn.execute(
- 'select file.path, context.context, line_map.numbits '
- 'from line_map '
- 'inner join file on file.id = line_map.file_id '
- 'inner join context on context.id = line_map.context_id'
+ 'select file.path, context.context, line_bits.numbits '
+ 'from line_bits '
+ 'inner join file on file.id = line_bits.file_id '
+ 'inner join context on context.id = line_bits.context_id'
)
for path, context, numbits in cur:
key = (aliases.map(path), context)
- numbits = from_blob(numbits)
if key in lines:
- numbits = merge_numbits(lines[key], numbits)
+ numbits = numbits_union(lines[key], numbits)
lines[key] = numbits
cur.close()
@@ -631,12 +616,12 @@ class CoverageData(SimpleReprMixin):
'(file_id, context_id, fromno, tono) values (?, ?, ?, ?)',
arc_rows
)
- conn.execute("delete from line_map")
+ conn.execute("delete from line_bits")
conn.executemany(
- "insert into line_map "
+ "insert into line_bits "
"(file_id, context_id, numbits) values (?, ?, ?)",
[
- (file_ids[file], context_ids[context], to_blob(numbits))
+ (file_ids[file], context_ids[context], numbits)
for (file, context), numbits in lines.items()
]
)
@@ -756,7 +741,7 @@ class CoverageData(SimpleReprMixin):
if file_id is None:
return None
else:
- query = "select numbits from line_map where file_id = ?"
+ query = "select numbits from line_bits where file_id = ?"
data = [file_id]
context_ids = self._get_query_context_ids(contexts)
if context_ids is not None:
@@ -766,7 +751,7 @@ class CoverageData(SimpleReprMixin):
bitmaps = list(con.execute(query, data))
nums = set()
for row in bitmaps:
- nums.update(numbits_to_nums(from_blob(row[0])))
+ nums.update(numbits_to_nums(row[0]))
return sorted(nums)
def arcs(self, filename, contexts=None):
@@ -812,7 +797,7 @@ class CoverageData(SimpleReprMixin):
lineno_contexts_map[tono].append(context)
else:
query = (
- "select l.numbits, c.context from line_map l, context c "
+ "select l.numbits, c.context from line_bits l, context c "
"where l.context_id = c.id "
"and file_id = ?"
)
@@ -823,7 +808,7 @@ class CoverageData(SimpleReprMixin):
query += " and l.context_id in (" + ids_array + ")"
data += context_ids
for numbits, context in con.execute(query, data):
- for lineno in numbits_to_nums(from_blob(numbits)):
+ for lineno in numbits_to_nums(numbits):
lineno_contexts_map[lineno].append(context)
return lineno_contexts_map
diff --git a/tests/test_numbits.py b/tests/test_numbits.py
index a556869b..eb094d2a 100644
--- a/tests/test_numbits.py
+++ b/tests/test_numbits.py
@@ -3,20 +3,23 @@
"""Tests for coverage.numbits"""
+import sqlite3
+
from hypothesis import example, given, settings
from hypothesis.strategies import sets, integers
from coverage import env
+from coverage.backward import byte_to_int
from coverage.numbits import (
- nums_to_numbits, numbits_to_nums, merge_numbits, numbits_any_intersection,
- num_in_numbits,
+ nums_to_numbits, numbits_to_nums, numbits_union, numbits_intersection,
+ numbits_any_intersection, num_in_numbits, register_sqlite_functions,
)
from tests.coveragetest import CoverageTest
# Hypothesis-generated line number data
line_numbers = integers(min_value=1, max_value=9999)
-line_number_sets = sets(line_numbers, min_size=1)
+line_number_sets = sets(line_numbers)
# When coverage-testing ourselves, hypothesis complains about a test being
# flaky because the first run exceeds the deadline (and fails), and the second
@@ -26,6 +29,12 @@ if env.METACOV:
default_settings = settings(default_settings, deadline=None)
+def good_numbits(numbits):
+ """Assert that numbits is good."""
+ # It shouldn't end with a zero byte, that should have been trimmed off.
+ assert (not numbits) or (byte_to_int(numbits[-1]) != 0)
+
+
class NumbitsOpTest(CoverageTest):
"""Tests of the numbits operations in numbits.py."""
@@ -34,19 +43,43 @@ class NumbitsOpTest(CoverageTest):
@given(line_number_sets)
@settings(default_settings)
def test_conversion(self, nums):
- nums2 = numbits_to_nums(nums_to_numbits(nums))
+ numbits = nums_to_numbits(nums)
+ good_numbits(numbits)
+ nums2 = numbits_to_nums(numbits)
self.assertEqual(nums, set(nums2))
@given(line_number_sets, line_number_sets)
@settings(default_settings)
- def test_merging(self, nums1, nums2):
- merged = numbits_to_nums(merge_numbits(nums_to_numbits(nums1), nums_to_numbits(nums2)))
- self.assertEqual(nums1 | nums2, set(merged))
+ def test_union(self, nums1, nums2):
+ nb1 = nums_to_numbits(nums1)
+ good_numbits(nb1)
+ nb2 = nums_to_numbits(nums2)
+ good_numbits(nb2)
+ nbu = numbits_union(nb1, nb2)
+ good_numbits(nbu)
+ union = numbits_to_nums(nbu)
+ self.assertEqual(nums1 | nums2, set(union))
+
+ @given(line_number_sets, line_number_sets)
+ @settings(default_settings)
+ def test_intersection(self, nums1, nums2):
+ nb1 = nums_to_numbits(nums1)
+ good_numbits(nb1)
+ nb2 = nums_to_numbits(nums2)
+ good_numbits(nb2)
+ nbi = numbits_intersection(nb1, nb2)
+ good_numbits(nbi)
+ intersection = numbits_to_nums(nbi)
+ self.assertEqual(nums1 & nums2, set(intersection))
@given(line_number_sets, line_number_sets)
@settings(default_settings)
def test_any_intersection(self, nums1, nums2):
- inter = numbits_any_intersection(nums_to_numbits(nums1), nums_to_numbits(nums2))
+ nb1 = nums_to_numbits(nums1)
+ good_numbits(nb1)
+ nb2 = nums_to_numbits(nums2)
+ good_numbits(nb2)
+ inter = numbits_any_intersection(nb1, nb2)
expect = bool(nums1 & nums2)
self.assertEqual(expect, bool(inter))
@@ -55,5 +88,71 @@ class NumbitsOpTest(CoverageTest):
@example(152, {144})
def test_num_in_numbits(self, num, nums):
numbits = nums_to_numbits(nums)
+ good_numbits(numbits)
is_in = num_in_numbits(num, numbits)
self.assertEqual(num in nums, is_in)
+
+
+class NumbitsSqliteFunctionTest(CoverageTest):
+ """Tests of the SQLite integration for numbits functions."""
+
+ run_in_temp_dir = False
+
+ def setUp(self):
+ super(NumbitsSqliteFunctionTest, self).setUp()
+ conn = sqlite3.connect(":memory:")
+ register_sqlite_functions(conn)
+ self.cursor = conn.cursor()
+ self.cursor.execute("create table data (id int, numbits blob)")
+ self.cursor.executemany(
+ "insert into data (id, numbits) values (?, ?)",
+ [
+ (i, nums_to_numbits(range(i, 100, i)))
+ for i in range(1, 11)
+ ]
+ )
+ self.addCleanup(self.cursor.close)
+
+ def test_numbits_union(self):
+ res = self.cursor.execute(
+ "select numbits_union("
+ "(select numbits from data where id = 7),"
+ "(select numbits from data where id = 9)"
+ ")"
+ )
+ answer = numbits_to_nums(list(res)[0][0])
+ self.assertEqual(
+ [7, 9, 14, 18, 21, 27, 28, 35, 36, 42, 45, 49,
+ 54, 56, 63, 70, 72, 77, 81, 84, 90, 91, 98, 99],
+ answer
+ )
+
+ def test_numbits_intersection(self):
+ res = self.cursor.execute(
+ "select numbits_intersection("
+ "(select numbits from data where id = 7),"
+ "(select numbits from data where id = 9)"
+ ")"
+ )
+ answer = numbits_to_nums(list(res)[0][0])
+ self.assertEqual([63], answer)
+
+ def test_numbits_any_intersection(self):
+ res = self.cursor.execute(
+ "select numbits_any_intersection(?, ?)",
+ (nums_to_numbits([1, 2, 3]), nums_to_numbits([3, 4, 5]))
+ )
+ answer = [any_inter for (any_inter,) in res]
+ self.assertEqual([1], answer)
+
+ res = self.cursor.execute(
+ "select numbits_any_intersection(?, ?)",
+ (nums_to_numbits([1, 2, 3]), nums_to_numbits([7, 8, 9]))
+ )
+ answer = [any_inter for (any_inter,) in res]
+ self.assertEqual([0], answer)
+
+ def test_num_in_numbits(self):
+ res = self.cursor.execute("select id, num_in_numbits(12, numbits) from data order by id")
+ answer = [is_in for (id, is_in) in res]
+ self.assertEqual([1, 1, 1, 1, 0, 1, 0, 0, 0, 0], answer)