diff options
-rw-r--r-- | coverage/numbits.py | 144 | ||||
-rw-r--r-- | coverage/sqldata.py | 75 | ||||
-rw-r--r-- | tests/test_numbits.py | 115 |
3 files changed, 262 insertions, 72 deletions
diff --git a/coverage/numbits.py b/coverage/numbits.py index abe40f41..4b340c8e 100644 --- a/coverage/numbits.py +++ b/coverage/numbits.py @@ -5,29 +5,62 @@ Functions to manipulate packed binary representations of number sets. To save space, coverage stores sets of line numbers in SQLite using a packed -binary representation called a numbits. A numbits is stored as a blob in the -database. The exact meaning of the bytes in the blobs should be considered an -implementation detail that might change in the future. Use these functions to -work with those binary blobs of data. +binary representation called a numbits. A numbits is a set of positive +integers. + +A numbits is stored as a blob in the database. The exact meaning of the bytes +in the blobs should be considered an implementation detail that might change in +the future. Use these functions to work with those binary blobs of data. """ +from coverage import env from coverage.backward import byte_to_int, bytes_to_ints, binary_bytes, zip_longest -from coverage.misc import contract +from coverage.misc import contract, new_contract + +if env.PY3: + def _to_blob(b): + """Convert a bytestring into a type SQLite will accept for a blob.""" + return b + new_contract('blob', lambda v: isinstance(v, bytes)) +else: + def _to_blob(b): + """Convert a bytestring into a type SQLite will accept for a blob.""" + return buffer(b) # pylint: disable=undefined-variable -@contract(nums='Iterable', returns='bytes') + new_contract('blob', lambda v: isinstance(v, buffer)) # pylint: disable=undefined-variable + +@contract(nums='Iterable', returns='blob') def nums_to_numbits(nums): - """Convert `nums` (a non-empty iterable of ints) into a numbits.""" - nbytes = max(nums) // 8 + 1 + """Convert `nums` into a numbits. + + Arguments: + nums (a reusable iterable of integers): the line numbers to store. + + Returns: + A binary blob. + """ + try: + nbytes = max(nums) // 8 + 1 + except ValueError: + # nums was empty. + return _to_blob(b'') b = bytearray(nbytes) for num in nums: b[num//8] |= 1 << num % 8 - return bytes(b) + return _to_blob(bytes(b)) -@contract(numbits='bytes', returns='list[int]') +@contract(numbits='blob', returns='list[int]') def numbits_to_nums(numbits): - """Convert a numbits into a list of numbers.""" + """Convert a numbits into a list of numbers. + + Arguments: + numbits (a binary blob): the packed number set. + + Returns: + A list of integers. + """ nums = [] for byte_i, byte in enumerate(bytes_to_ints(numbits)): for bit_i in range(8): @@ -35,22 +68,95 @@ def numbits_to_nums(numbits): nums.append(byte_i * 8 + bit_i) return nums -@contract(numbits1='bytes', numbits2='bytes', returns='bytes') -def merge_numbits(numbits1, numbits2): - """Merge two numbits""" +@contract(numbits1='blob', numbits2='blob', returns='blob') +def numbits_union(numbits1, numbits2): + """Compute the union of two numbits. + + Arguments: + numbits1, numbits2: packed number sets. + + Returns: + A new numbits, the union of the two number sets. + """ + byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0) + return _to_blob(binary_bytes(b1 | b2 for b1, b2 in byte_pairs)) + +@contract(numbits1='blob', numbits2='blob', returns='blob') +def numbits_intersection(numbits1, numbits2): + """Compute the intersection of two numbits. + + Arguments: + numbits1, numbits2: packed number sets. + + Returns: + A new numbits, the intersection of the two number sets. + """ byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0) - return binary_bytes(b1 | b2 for b1, b2 in byte_pairs) + intersection_bytes = binary_bytes(b1 & b2 for b1, b2 in byte_pairs) + return _to_blob(intersection_bytes.rstrip(b'\0')) -@contract(numbits1='bytes', numbits2='bytes', returns='bool') +@contract(numbits1='blob', numbits2='blob', returns='bool') def numbits_any_intersection(numbits1, numbits2): - """Is there any number that appears in both numbits?""" + """Is there any number that appears in both numbits? + + Determine whether two number sets have a non-empty intersection. This is + faster than computing the intersection. + + Arguments: + numbits1, numbits2: packed number sets. + + Returns: + A boolean, true if there is any number in both of the number sets. + """ byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0) return any(b1 & b2 for b1, b2 in byte_pairs) -@contract(num='int', numbits='bytes', returns='bool') +@contract(num='int', numbits='blob', returns='bool') def num_in_numbits(num, numbits): - """Does the integer `num` appear in `numbits`?""" + """Does the integer `num` appear in `numbits`? + + Arguments: + num (integer) + + numbits (binary blob) + + Returns: + A boolean, true if `num` is a member of `numbits`. + """ nbyte, nbit = divmod(num, 8) if nbyte >= len(numbits): return False return bool(byte_to_int(numbits[nbyte]) & (1 << nbit)) + +def register_sqlite_functions(connection): + """ + Define numbits functions in a SQLite connection. + + This defines these functions for use in SQLite statements: + + * :func:`numbits_union` + * :func:`numbits_intersection` + * :func:`numbits_any_intersection` + * :func:`num_in_numbits` + + `connection` is a :class:`sqlite3.Connection <python:sqlite3.Connection>` + object. After creating the connection, pass it to this function to + register the numbits functions. Then you can use numbits functions in your + queries:: + + import sqlite3 + from coverage.numbits import register_sqlite_functions + + conn = sqlite3.connect('example.db') + register_sqlite_functions(conn) + c = conn.cursor() + c.execute( + "select lb.file_id, lb.context_id from line_bits lb" + "where num_in_numbits(?, lb.numbits)", + (interesting_line_number,) + ) + """ + connection.create_function("numbits_union", 2, numbits_union) + connection.create_function("numbits_intersection", 2, numbits_intersection) + connection.create_function("numbits_any_intersection", 2, numbits_any_intersection) + connection.create_function("num_in_numbits", 2, num_in_numbits) diff --git a/coverage/sqldata.py b/coverage/sqldata.py index 3ee34f0f..5e7edd72 100644 --- a/coverage/sqldata.py +++ b/coverage/sqldata.py @@ -20,16 +20,18 @@ import zlib from coverage.backward import get_thread_id, iitems, to_bytes, to_string from coverage.debug import NoDebugging, SimpleReprMixin -from coverage import env from coverage.files import PathAliases from coverage.misc import CoverageException, file_be_gone, filename_suffix, isolate_module from coverage.misc import contract -from coverage.numbits import nums_to_numbits, numbits_to_nums, merge_numbits +from coverage.numbits import nums_to_numbits, numbits_to_nums, numbits_union from coverage.version import __version__ os = isolate_module(os) -SCHEMA_VERSION = 6 +# If you change the schema, increment the SCHEMA_VERSION, and update the +# docs in docs/dbschema.rst also. + +SCHEMA_VERSION = 7 # Schema versions: # 1: Released in 5.0a2 @@ -38,6 +40,7 @@ SCHEMA_VERSION = 6 # 4: Changed line_map.bitmap to line_map.numbits. # 5: Added foreign key declarations. # 6: Key-value in meta. +# 7: line_map -> line_bits SCHEMA = """ CREATE TABLE coverage_schema ( @@ -71,8 +74,9 @@ CREATE TABLE context ( unique (context) ); -CREATE TABLE line_map ( - -- If recording lines, a row per context per line executed. +CREATE TABLE line_bits ( + -- If recording lines, a row per context per file executed. + -- All of the line numbers for that file/context are in one numbits. file_id integer, -- foreign key to `file`. context_id integer, -- foreign key to `context`. numbits blob, -- see the numbits functions in coverage.numbits @@ -100,24 +104,6 @@ CREATE TABLE tracer ( ); """ -if env.PY2: - def to_blob(b): - """Convert a bytestring into a type SQLite will accept for a blob.""" - return buffer(b) # pylint: disable=undefined-variable - - def from_blob(blob): - """Convert a blob read from SQLite into a bytestring.""" - return bytes(blob) -else: - def to_blob(b): - """Convert a bytestring into a type SQLite will accept for a blob.""" - return b - - def from_blob(blob): - """Convert a blob read from SQLite into a bytestring.""" - return blob - - class CoverageData(SimpleReprMixin): """Manages collected coverage data, including file storage. @@ -386,15 +372,15 @@ class CoverageData(SimpleReprMixin): for filename, linenos in iitems(line_data): linemap = nums_to_numbits(linenos) file_id = self._file_id(filename, add=True) - query = "select numbits from line_map where file_id = ? and context_id = ?" + query = "select numbits from line_bits where file_id = ? and context_id = ?" existing = list(con.execute(query, (file_id, self._current_context_id))) if existing: - linemap = merge_numbits(linemap, from_blob(existing[0][0])) + linemap = numbits_union(linemap, existing[0][0]) con.execute( - "insert or replace into line_map " + "insert or replace into line_bits " " (file_id, context_id, numbits) values (?, ?, ?)", - (file_id, self._current_context_id, to_blob(linemap)), + (file_id, self._current_context_id, linemap), ) def add_arcs(self, arc_data): @@ -530,13 +516,13 @@ class CoverageData(SimpleReprMixin): # Get line data. cur = conn.execute( - 'select file.path, context.context, line_map.numbits ' - 'from line_map ' - 'inner join file on file.id = line_map.file_id ' - 'inner join context on context.id = line_map.context_id' + 'select file.path, context.context, line_bits.numbits ' + 'from line_bits ' + 'inner join file on file.id = line_bits.file_id ' + 'inner join context on context.id = line_bits.context_id' ) lines = { - (files[path], context): from_blob(numbits) + (files[path], context): numbits for (path, context, numbits) in cur } cur.close() @@ -610,16 +596,15 @@ class CoverageData(SimpleReprMixin): # Get line data. cur = conn.execute( - 'select file.path, context.context, line_map.numbits ' - 'from line_map ' - 'inner join file on file.id = line_map.file_id ' - 'inner join context on context.id = line_map.context_id' + 'select file.path, context.context, line_bits.numbits ' + 'from line_bits ' + 'inner join file on file.id = line_bits.file_id ' + 'inner join context on context.id = line_bits.context_id' ) for path, context, numbits in cur: key = (aliases.map(path), context) - numbits = from_blob(numbits) if key in lines: - numbits = merge_numbits(lines[key], numbits) + numbits = numbits_union(lines[key], numbits) lines[key] = numbits cur.close() @@ -631,12 +616,12 @@ class CoverageData(SimpleReprMixin): '(file_id, context_id, fromno, tono) values (?, ?, ?, ?)', arc_rows ) - conn.execute("delete from line_map") + conn.execute("delete from line_bits") conn.executemany( - "insert into line_map " + "insert into line_bits " "(file_id, context_id, numbits) values (?, ?, ?)", [ - (file_ids[file], context_ids[context], to_blob(numbits)) + (file_ids[file], context_ids[context], numbits) for (file, context), numbits in lines.items() ] ) @@ -756,7 +741,7 @@ class CoverageData(SimpleReprMixin): if file_id is None: return None else: - query = "select numbits from line_map where file_id = ?" + query = "select numbits from line_bits where file_id = ?" data = [file_id] context_ids = self._get_query_context_ids(contexts) if context_ids is not None: @@ -766,7 +751,7 @@ class CoverageData(SimpleReprMixin): bitmaps = list(con.execute(query, data)) nums = set() for row in bitmaps: - nums.update(numbits_to_nums(from_blob(row[0]))) + nums.update(numbits_to_nums(row[0])) return sorted(nums) def arcs(self, filename, contexts=None): @@ -812,7 +797,7 @@ class CoverageData(SimpleReprMixin): lineno_contexts_map[tono].append(context) else: query = ( - "select l.numbits, c.context from line_map l, context c " + "select l.numbits, c.context from line_bits l, context c " "where l.context_id = c.id " "and file_id = ?" ) @@ -823,7 +808,7 @@ class CoverageData(SimpleReprMixin): query += " and l.context_id in (" + ids_array + ")" data += context_ids for numbits, context in con.execute(query, data): - for lineno in numbits_to_nums(from_blob(numbits)): + for lineno in numbits_to_nums(numbits): lineno_contexts_map[lineno].append(context) return lineno_contexts_map diff --git a/tests/test_numbits.py b/tests/test_numbits.py index a556869b..eb094d2a 100644 --- a/tests/test_numbits.py +++ b/tests/test_numbits.py @@ -3,20 +3,23 @@ """Tests for coverage.numbits""" +import sqlite3 + from hypothesis import example, given, settings from hypothesis.strategies import sets, integers from coverage import env +from coverage.backward import byte_to_int from coverage.numbits import ( - nums_to_numbits, numbits_to_nums, merge_numbits, numbits_any_intersection, - num_in_numbits, + nums_to_numbits, numbits_to_nums, numbits_union, numbits_intersection, + numbits_any_intersection, num_in_numbits, register_sqlite_functions, ) from tests.coveragetest import CoverageTest # Hypothesis-generated line number data line_numbers = integers(min_value=1, max_value=9999) -line_number_sets = sets(line_numbers, min_size=1) +line_number_sets = sets(line_numbers) # When coverage-testing ourselves, hypothesis complains about a test being # flaky because the first run exceeds the deadline (and fails), and the second @@ -26,6 +29,12 @@ if env.METACOV: default_settings = settings(default_settings, deadline=None) +def good_numbits(numbits): + """Assert that numbits is good.""" + # It shouldn't end with a zero byte, that should have been trimmed off. + assert (not numbits) or (byte_to_int(numbits[-1]) != 0) + + class NumbitsOpTest(CoverageTest): """Tests of the numbits operations in numbits.py.""" @@ -34,19 +43,43 @@ class NumbitsOpTest(CoverageTest): @given(line_number_sets) @settings(default_settings) def test_conversion(self, nums): - nums2 = numbits_to_nums(nums_to_numbits(nums)) + numbits = nums_to_numbits(nums) + good_numbits(numbits) + nums2 = numbits_to_nums(numbits) self.assertEqual(nums, set(nums2)) @given(line_number_sets, line_number_sets) @settings(default_settings) - def test_merging(self, nums1, nums2): - merged = numbits_to_nums(merge_numbits(nums_to_numbits(nums1), nums_to_numbits(nums2))) - self.assertEqual(nums1 | nums2, set(merged)) + def test_union(self, nums1, nums2): + nb1 = nums_to_numbits(nums1) + good_numbits(nb1) + nb2 = nums_to_numbits(nums2) + good_numbits(nb2) + nbu = numbits_union(nb1, nb2) + good_numbits(nbu) + union = numbits_to_nums(nbu) + self.assertEqual(nums1 | nums2, set(union)) + + @given(line_number_sets, line_number_sets) + @settings(default_settings) + def test_intersection(self, nums1, nums2): + nb1 = nums_to_numbits(nums1) + good_numbits(nb1) + nb2 = nums_to_numbits(nums2) + good_numbits(nb2) + nbi = numbits_intersection(nb1, nb2) + good_numbits(nbi) + intersection = numbits_to_nums(nbi) + self.assertEqual(nums1 & nums2, set(intersection)) @given(line_number_sets, line_number_sets) @settings(default_settings) def test_any_intersection(self, nums1, nums2): - inter = numbits_any_intersection(nums_to_numbits(nums1), nums_to_numbits(nums2)) + nb1 = nums_to_numbits(nums1) + good_numbits(nb1) + nb2 = nums_to_numbits(nums2) + good_numbits(nb2) + inter = numbits_any_intersection(nb1, nb2) expect = bool(nums1 & nums2) self.assertEqual(expect, bool(inter)) @@ -55,5 +88,71 @@ class NumbitsOpTest(CoverageTest): @example(152, {144}) def test_num_in_numbits(self, num, nums): numbits = nums_to_numbits(nums) + good_numbits(numbits) is_in = num_in_numbits(num, numbits) self.assertEqual(num in nums, is_in) + + +class NumbitsSqliteFunctionTest(CoverageTest): + """Tests of the SQLite integration for numbits functions.""" + + run_in_temp_dir = False + + def setUp(self): + super(NumbitsSqliteFunctionTest, self).setUp() + conn = sqlite3.connect(":memory:") + register_sqlite_functions(conn) + self.cursor = conn.cursor() + self.cursor.execute("create table data (id int, numbits blob)") + self.cursor.executemany( + "insert into data (id, numbits) values (?, ?)", + [ + (i, nums_to_numbits(range(i, 100, i))) + for i in range(1, 11) + ] + ) + self.addCleanup(self.cursor.close) + + def test_numbits_union(self): + res = self.cursor.execute( + "select numbits_union(" + "(select numbits from data where id = 7)," + "(select numbits from data where id = 9)" + ")" + ) + answer = numbits_to_nums(list(res)[0][0]) + self.assertEqual( + [7, 9, 14, 18, 21, 27, 28, 35, 36, 42, 45, 49, + 54, 56, 63, 70, 72, 77, 81, 84, 90, 91, 98, 99], + answer + ) + + def test_numbits_intersection(self): + res = self.cursor.execute( + "select numbits_intersection(" + "(select numbits from data where id = 7)," + "(select numbits from data where id = 9)" + ")" + ) + answer = numbits_to_nums(list(res)[0][0]) + self.assertEqual([63], answer) + + def test_numbits_any_intersection(self): + res = self.cursor.execute( + "select numbits_any_intersection(?, ?)", + (nums_to_numbits([1, 2, 3]), nums_to_numbits([3, 4, 5])) + ) + answer = [any_inter for (any_inter,) in res] + self.assertEqual([1], answer) + + res = self.cursor.execute( + "select numbits_any_intersection(?, ?)", + (nums_to_numbits([1, 2, 3]), nums_to_numbits([7, 8, 9])) + ) + answer = [any_inter for (any_inter,) in res] + self.assertEqual([0], answer) + + def test_num_in_numbits(self): + res = self.cursor.execute("select id, num_in_numbits(12, numbits) from data order by id") + answer = [is_in for (id, is_in) in res] + self.assertEqual([1, 1, 1, 1, 0, 1, 0, 0, 0, 0], answer) |