3 files changed, 262 insertions, 72 deletions
diff --git a/coverage/numbits.py b/coverage/numbits.py
index abe40f41..4b340c8e 100644
--- a/coverage/numbits.py
+++ b/coverage/numbits.py
@@ -5,29 +5,62 @@
 Functions to manipulate packed binary representations of number sets.
 
 To save space, coverage stores sets of line numbers in SQLite using a packed
-binary representation called a numbits.  A numbits is stored as a blob in the
-database.  The exact meaning of the bytes in the blobs should be considered an
-implementation detail that might change in the future.  Use these functions to
-work with those binary blobs of data.
+binary representation called a numbits.  A numbits is a set of positive
+integers.
+
+A numbits is stored as a blob in the database.  The exact meaning of the bytes
+in the blobs should be considered an implementation detail that might change in
+the future.  Use these functions to work with those binary blobs of data.
 
 """
 
+from coverage import env
 from coverage.backward import byte_to_int, bytes_to_ints, binary_bytes, zip_longest
-from coverage.misc import contract
+from coverage.misc import contract, new_contract
+
+if env.PY3:
+    def _to_blob(b):
+        """Convert a bytestring into a type SQLite will accept for a blob."""
+        return b
 
+    new_contract('blob', lambda v: isinstance(v, bytes))
+else:
+    def _to_blob(b):
+        """Convert a bytestring into a type SQLite will accept for a blob."""
+        return buffer(b)                                    # pylint: disable=undefined-variable
 
-@contract(nums='Iterable', returns='bytes')
+    new_contract('blob', lambda v: isinstance(v, buffer))   # pylint: disable=undefined-variable
+
+@contract(nums='Iterable', returns='blob')
 def nums_to_numbits(nums):
-    """Convert `nums` (a non-empty iterable of ints) into a numbits."""
-    nbytes = max(nums) // 8 + 1
+    """Convert `nums` into a numbits.
+
+    Arguments:
+        nums (a reusable iterable of integers): the line numbers to store.
+
+    Returns:
+        A binary blob.
+    """
+    try:
+        nbytes = max(nums) // 8 + 1
+    except ValueError:
+        # nums was empty.
+        return _to_blob(b'')
     b = bytearray(nbytes)
     for num in nums:
         b[num//8] |= 1 << num % 8
-    return bytes(b)
+    return _to_blob(bytes(b))
 
-@contract(numbits='bytes', returns='list[int]')
+@contract(numbits='blob', returns='list[int]')
 def numbits_to_nums(numbits):
-    """Convert a numbits into a list of numbers."""
+    """Convert a numbits into a list of numbers.
+
+    Arguments:
+        numbits (a binary blob): the packed number set.
+
+    Returns:
+        A list of integers.
+    """
     nums = []
     for byte_i, byte in enumerate(bytes_to_ints(numbits)):
         for bit_i in range(8):
@@ -35,22 +68,95 @@ def numbits_to_nums(numbits):
                 nums.append(byte_i * 8 + bit_i)
     return nums
 
-@contract(numbits1='bytes', numbits2='bytes', returns='bytes')
-def merge_numbits(numbits1, numbits2):
-    """Merge two numbits"""
+@contract(numbits1='blob', numbits2='blob', returns='blob')
+def numbits_union(numbits1, numbits2):
+    """Compute the union of two numbits.
+
+    Arguments:
+        numbits1, numbits2: packed number sets.
+
+    Returns:
+        A new numbits, the union of the two number sets.
+    """
+    byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
+    return _to_blob(binary_bytes(b1 | b2 for b1, b2 in byte_pairs))
+
+@contract(numbits1='blob', numbits2='blob', returns='blob')
+def numbits_intersection(numbits1, numbits2):
+    """Compute the intersection of two numbits.
+
+    Arguments:
+        numbits1, numbits2: packed number sets.
+
+    Returns:
+        A new numbits, the intersection of the two number sets.
+    """
     byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
-    return binary_bytes(b1 | b2 for b1, b2 in byte_pairs)
+    intersection_bytes = binary_bytes(b1 & b2 for b1, b2 in byte_pairs)
+    return _to_blob(intersection_bytes.rstrip(b'\0'))
 
-@contract(numbits1='bytes', numbits2='bytes', returns='bool')
+@contract(numbits1='blob', numbits2='blob', returns='bool')
 def numbits_any_intersection(numbits1, numbits2):
-    """Is there any number that appears in both numbits?"""
+    """Is there any number that appears in both numbits?
+
+    Determine whether two number sets have a non-empty intersection. This is
+    faster than computing the intersection.
+
+    Arguments:
+        numbits1, numbits2: packed number sets.
+
+    Returns:
+        A boolean, true if there is any number in both of the number sets.
+    """
     byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
     return any(b1 & b2 for b1, b2 in byte_pairs)
 
-@contract(num='int', numbits='bytes', returns='bool')
+@contract(num='int', numbits='blob', returns='bool')
 def num_in_numbits(num, numbits):
-    """Does the integer `num` appear in `numbits`?"""
+    """Does the integer `num` appear in `numbits`?
+
+    Arguments:
+        num (integer)
+
+        numbits (binary blob)
+
+    Returns:
+        A boolean, true if `num` is a member of `numbits`.
+    """
     nbyte, nbit = divmod(num, 8)
     if nbyte >= len(numbits):
         return False
     return bool(byte_to_int(numbits[nbyte]) & (1 << nbit))
+
+def register_sqlite_functions(connection):
+    """
+    Define numbits functions in a SQLite connection.
+
+    This defines these functions for use in SQLite statements:
+
+    * :func:`numbits_union`
+    * :func:`numbits_intersection`
+    * :func:`numbits_any_intersection`
+    * :func:`num_in_numbits`
+
+    `connection` is a :class:`sqlite3.Connection <python:sqlite3.Connection>`
+    object.  After creating the connection, pass it to this function to
+    register the numbits functions.  Then you can use numbits functions in your
+    queries::
+
+        import sqlite3
+        from coverage.numbits import register_sqlite_functions
+
+        conn = sqlite3.connect('example.db')
+        register_sqlite_functions(conn)
+        c = conn.cursor()
+        c.execute(
+            "select lb.file_id, lb.context_id from line_bits lb"
+            "where num_in_numbits(?, lb.numbits)",
+            (interesting_line_number,)
+        )
+    """
+    connection.create_function("numbits_union", 2, numbits_union)
+    connection.create_function("numbits_intersection", 2, numbits_intersection)
+    connection.create_function("numbits_any_intersection", 2, numbits_any_intersection)
+    connection.create_function("num_in_numbits", 2, num_in_numbits)
diff --git a/coverage/sqldata.py b/coverage/sqldata.py
index 3ee34f0f..5e7edd72 100644
--- a/coverage/sqldata.py
+++ b/coverage/sqldata.py
@@ -20,16 +20,18 @@ import zlib
 
 from coverage.backward import get_thread_id, iitems, to_bytes, to_string
 from coverage.debug import NoDebugging, SimpleReprMixin
-from coverage import env
 from coverage.files import PathAliases
 from coverage.misc import CoverageException, file_be_gone, filename_suffix, isolate_module
 from coverage.misc import contract
-from coverage.numbits import nums_to_numbits, numbits_to_nums, merge_numbits
+from coverage.numbits import nums_to_numbits, numbits_to_nums, numbits_union
 from coverage.version import __version__
 
 os = isolate_module(os)
 
-SCHEMA_VERSION = 6
+# If you change the schema, increment the SCHEMA_VERSION, and update the
+# docs in docs/dbschema.rst also.
+
+SCHEMA_VERSION = 7
 
 # Schema versions:
 # 1: Released in 5.0a2
@@ -38,6 +40,7 @@ SCHEMA_VERSION = 6
 # 4: Changed line_map.bitmap to line_map.numbits.
 # 5: Added foreign key declarations.
 # 6: Key-value in meta.
+# 7: line_map -> line_bits
 
 SCHEMA = """
 CREATE TABLE coverage_schema (
@@ -71,8 +74,9 @@ CREATE TABLE context (
     unique (context)
 );
 
-CREATE TABLE line_map (
-    -- If recording lines, a row per context per line executed.
+CREATE TABLE line_bits (
+    -- If recording lines, a row per context per file executed.
+    -- All of the line numbers for that file/context are in one numbits.
     file_id integer,            -- foreign key to `file`.
     context_id integer,         -- foreign key to `context`.
     numbits blob,               -- see the numbits functions in coverage.numbits
@@ -100,24 +104,6 @@ CREATE TABLE tracer (
 );
 """
 
-if env.PY2:
-    def to_blob(b):
-        """Convert a bytestring into a type SQLite will accept for a blob."""
-        return buffer(b)        # pylint: disable=undefined-variable
-
-    def from_blob(blob):
-        """Convert a blob read from SQLite into a bytestring."""
-        return bytes(blob)
-else:
-    def to_blob(b):
-        """Convert a bytestring into a type SQLite will accept for a blob."""
-        return b
-
-    def from_blob(blob):
-        """Convert a blob read from SQLite into a bytestring."""
-        return blob
-
-
 class CoverageData(SimpleReprMixin):
     """Manages collected coverage data, including file storage.
 
@@ -386,15 +372,15 @@ class CoverageData(SimpleReprMixin):
             for filename, linenos in iitems(line_data):
                 linemap = nums_to_numbits(linenos)
                 file_id = self._file_id(filename, add=True)
-                query = "select numbits from line_map where file_id = ? and context_id = ?"
+                query = "select numbits from line_bits where file_id = ? and context_id = ?"
                 existing = list(con.execute(query, (file_id, self._current_context_id)))
                 if existing:
-                    linemap = merge_numbits(linemap, from_blob(existing[0][0]))
+                    linemap = numbits_union(linemap, existing[0][0])
 
                 con.execute(
-                    "insert or replace into line_map "
+                    "insert or replace into line_bits "
                     " (file_id, context_id, numbits) values (?, ?, ?)",
-                    (file_id, self._current_context_id, to_blob(linemap)),
+                    (file_id, self._current_context_id, linemap),
                 )
 
     def add_arcs(self, arc_data):
@@ -530,13 +516,13 @@ class CoverageData(SimpleReprMixin):
 
             # Get line data.
             cur = conn.execute(
-                'select file.path, context.context, line_map.numbits '
-                'from line_map '
-                'inner join file on file.id = line_map.file_id '
-                'inner join context on context.id = line_map.context_id'
+                'select file.path, context.context, line_bits.numbits '
+                'from line_bits '
+                'inner join file on file.id = line_bits.file_id '
+                'inner join context on context.id = line_bits.context_id'
                 )
             lines = {
-                (files[path], context): from_blob(numbits)
+                (files[path], context): numbits
                 for (path, context, numbits) in cur
                 }
             cur.close()
@@ -610,16 +596,15 @@ class CoverageData(SimpleReprMixin):
 
             # Get line data.
             cur = conn.execute(
-                'select file.path, context.context, line_map.numbits '
-                'from line_map '
-                'inner join file on file.id = line_map.file_id '
-                'inner join context on context.id = line_map.context_id'
+                'select file.path, context.context, line_bits.numbits '
+                'from line_bits '
+                'inner join file on file.id = line_bits.file_id '
+                'inner join context on context.id = line_bits.context_id'
                 )
             for path, context, numbits in cur:
                 key = (aliases.map(path), context)
-                numbits = from_blob(numbits)
                 if key in lines:
-                    numbits = merge_numbits(lines[key], numbits)
+                    numbits = numbits_union(lines[key], numbits)
                 lines[key] = numbits
             cur.close()
 
@@ -631,12 +616,12 @@ class CoverageData(SimpleReprMixin):
                 '(file_id, context_id, fromno, tono) values (?, ?, ?, ?)',
                 arc_rows
             )
-            conn.execute("delete from line_map")
+            conn.execute("delete from line_bits")
             conn.executemany(
-                "insert into line_map "
+                "insert into line_bits "
                 "(file_id, context_id, numbits) values (?, ?, ?)",
                 [
-                    (file_ids[file], context_ids[context], to_blob(numbits))
+                    (file_ids[file], context_ids[context], numbits)
                     for (file, context), numbits in lines.items()
                 ]
             )
@@ -756,7 +741,7 @@ class CoverageData(SimpleReprMixin):
             if file_id is None:
                 return None
             else:
-                query = "select numbits from line_map where file_id = ?"
+                query = "select numbits from line_bits where file_id = ?"
                 data = [file_id]
                 context_ids = self._get_query_context_ids(contexts)
                 if context_ids is not None:
@@ -766,7 +751,7 @@ class CoverageData(SimpleReprMixin):
                 bitmaps = list(con.execute(query, data))
                 nums = set()
                 for row in bitmaps:
-                    nums.update(numbits_to_nums(from_blob(row[0])))
+                    nums.update(numbits_to_nums(row[0]))
                 return sorted(nums)
 
     def arcs(self, filename, contexts=None):
@@ -812,7 +797,7 @@ class CoverageData(SimpleReprMixin):
                         lineno_contexts_map[tono].append(context)
             else:
                 query = (
-                    "select l.numbits, c.context from line_map l, context c "
+                    "select l.numbits, c.context from line_bits l, context c "
                     "where l.context_id = c.id "
                     "and file_id = ?"
                     )
@@ -823,7 +808,7 @@ class CoverageData(SimpleReprMixin):
                     query += " and l.context_id in (" + ids_array + ")"
                     data += context_ids
                 for numbits, context in con.execute(query, data):
-                    for lineno in numbits_to_nums(from_blob(numbits)):
+                    for lineno in numbits_to_nums(numbits):
                         lineno_contexts_map[lineno].append(context)
         return lineno_contexts_map
 
diff --git a/tests/test_numbits.py b/tests/test_numbits.py
index a556869b..eb094d2a 100644
--- a/tests/test_numbits.py
+++ b/tests/test_numbits.py
@@ -3,20 +3,23 @@
 
 """Tests for coverage.numbits"""
 
+import sqlite3
+
 from hypothesis import example, given, settings
 from hypothesis.strategies import sets, integers
 
 from coverage import env
+from coverage.backward import byte_to_int
 from coverage.numbits import (
-    nums_to_numbits, numbits_to_nums, merge_numbits, numbits_any_intersection,
-    num_in_numbits,
+    nums_to_numbits, numbits_to_nums, numbits_union, numbits_intersection,
+    numbits_any_intersection, num_in_numbits, register_sqlite_functions,
     )
 
 from tests.coveragetest import CoverageTest
 
 # Hypothesis-generated line number data
 line_numbers = integers(min_value=1, max_value=9999)
-line_number_sets = sets(line_numbers, min_size=1)
+line_number_sets = sets(line_numbers)
 
 # When coverage-testing ourselves, hypothesis complains about a test being
 # flaky because the first run exceeds the deadline (and fails), and the second
@@ -26,6 +29,12 @@ if env.METACOV:
     default_settings = settings(default_settings, deadline=None)
 
 
+def good_numbits(numbits):
+    """Assert that numbits is good."""
+    # It shouldn't end with a zero byte, that should have been trimmed off.
+    assert (not numbits) or (byte_to_int(numbits[-1]) != 0)
+
+
 class NumbitsOpTest(CoverageTest):
     """Tests of the numbits operations in numbits.py."""
 
@@ -34,19 +43,43 @@ class NumbitsOpTest(CoverageTest):
     @given(line_number_sets)
     @settings(default_settings)
     def test_conversion(self, nums):
-        nums2 = numbits_to_nums(nums_to_numbits(nums))
+        numbits = nums_to_numbits(nums)
+        good_numbits(numbits)
+        nums2 = numbits_to_nums(numbits)
         self.assertEqual(nums, set(nums2))
 
     @given(line_number_sets, line_number_sets)
     @settings(default_settings)
-    def test_merging(self, nums1, nums2):
-        merged = numbits_to_nums(merge_numbits(nums_to_numbits(nums1), nums_to_numbits(nums2)))
-        self.assertEqual(nums1 | nums2, set(merged))
+    def test_union(self, nums1, nums2):
+        nb1 = nums_to_numbits(nums1)
+        good_numbits(nb1)
+        nb2 = nums_to_numbits(nums2)
+        good_numbits(nb2)
+        nbu = numbits_union(nb1, nb2)
+        good_numbits(nbu)
+        union = numbits_to_nums(nbu)
+        self.assertEqual(nums1 | nums2, set(union))
+
+    @given(line_number_sets, line_number_sets)
+    @settings(default_settings)
+    def test_intersection(self, nums1, nums2):
+        nb1 = nums_to_numbits(nums1)
+        good_numbits(nb1)
+        nb2 = nums_to_numbits(nums2)
+        good_numbits(nb2)
+        nbi = numbits_intersection(nb1, nb2)
+        good_numbits(nbi)
+        intersection = numbits_to_nums(nbi)
+        self.assertEqual(nums1 & nums2, set(intersection))
 
     @given(line_number_sets, line_number_sets)
     @settings(default_settings)
     def test_any_intersection(self, nums1, nums2):
-        inter = numbits_any_intersection(nums_to_numbits(nums1), nums_to_numbits(nums2))
+        nb1 = nums_to_numbits(nums1)
+        good_numbits(nb1)
+        nb2 = nums_to_numbits(nums2)
+        good_numbits(nb2)
+        inter = numbits_any_intersection(nb1, nb2)
         expect = bool(nums1 & nums2)
         self.assertEqual(expect, bool(inter))
 
@@ -55,5 +88,71 @@ class NumbitsOpTest(CoverageTest):
     @example(152, {144})
     def test_num_in_numbits(self, num, nums):
         numbits = nums_to_numbits(nums)
+        good_numbits(numbits)
         is_in = num_in_numbits(num, numbits)
         self.assertEqual(num in nums, is_in)
+
+
+class NumbitsSqliteFunctionTest(CoverageTest):
+    """Tests of the SQLite integration for numbits functions."""
+
+    run_in_temp_dir = False
+
+    def setUp(self):
+        super(NumbitsSqliteFunctionTest, self).setUp()
+        conn = sqlite3.connect(":memory:")
+        register_sqlite_functions(conn)
+        self.cursor = conn.cursor()
+        self.cursor.execute("create table data (id int, numbits blob)")
+        self.cursor.executemany(
+            "insert into data (id, numbits) values (?, ?)",
+            [
+                (i, nums_to_numbits(range(i, 100, i)))
+                for i in range(1, 11)
+            ]
+        )
+        self.addCleanup(self.cursor.close)
+
+    def test_numbits_union(self):
+        res = self.cursor.execute(
+            "select numbits_union("
+                "(select numbits from data where id = 7),"
+                "(select numbits from data where id = 9)"
+                ")"
+        )
+        answer = numbits_to_nums(list(res)[0][0])
+        self.assertEqual(
+            [7, 9, 14, 18, 21, 27, 28, 35, 36, 42, 45, 49,
+                54, 56, 63, 70, 72, 77, 81, 84, 90, 91, 98, 99],
+            answer
+        )
+
+    def test_numbits_intersection(self):
+        res = self.cursor.execute(
+            "select numbits_intersection("
+                "(select numbits from data where id = 7),"
+                "(select numbits from data where id = 9)"
+                ")"
+        )
+        answer = numbits_to_nums(list(res)[0][0])
+        self.assertEqual([63], answer)
+
+    def test_numbits_any_intersection(self):
+        res = self.cursor.execute(
+            "select numbits_any_intersection(?, ?)",
+            (nums_to_numbits([1, 2, 3]), nums_to_numbits([3, 4, 5]))
+        )
+        answer = [any_inter for (any_inter,) in res]
+        self.assertEqual([1], answer)
+
+        res = self.cursor.execute(
+            "select numbits_any_intersection(?, ?)",
+            (nums_to_numbits([1, 2, 3]), nums_to_numbits([7, 8, 9]))
+        )
+        answer = [any_inter for (any_inter,) in res]
+        self.assertEqual([0], answer)
+
+    def test_num_in_numbits(self):
+        res = self.cursor.execute("select id, num_in_numbits(12, numbits) from data order by id")
+        answer = [is_in for (id, is_in) in res]
+        self.assertEqual([1, 1, 1, 1, 0, 1, 0, 0, 0, 0], answer)