From 98edd617f3b62a02cb2df9b418fcc4ece45c7ec0 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 15 May 2015 17:59:46 +0300 Subject: Fix datatype confusion with the new lossy GiST distance functions. We can only support a lossy distance function when the distance function's datatype is comparable with the original ordering operator's datatype. The distance function always returns a float8, so we are limited to float8, and float4 (by a hard-coded cast of the float8 to float4). In light of this limitation, it seems like a good idea to have a separate 'recheck' flag for the ORDER BY expressions, so that if you have a non-lossy distance function, it still works with lossy quals. There are cases like that with the build-in or contrib opclasses, but it's plausible. There was a hidden assumption that the ORDER BY values returned by GiST match the original ordering operator's return type, but there are plenty of examples where that's not true, e.g. in btree_gist and pg_trgm. As long as the distance function is not lossy, we can tolerate that and just not return the distance to the executor (or rather, always return NULL). The executor doesn't need the distances if there are no lossy results. There was another little bug: the recheck variable was not initialized before calling the distance function. That revealed the bigger issue, as the executor tried to reorder tuples that didn't need reordering, and that failed because of the datatype mismatch. --- src/backend/access/gist/gistget.c | 56 ++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 12 deletions(-) (limited to 'src/backend/access/gist/gistget.c') diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 1f791a4f8e..09fba06f73 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -16,6 +16,7 @@ #include "access/gist_private.h" #include "access/relscan.h" +#include "catalog/pg_type.h" #include "miscadmin.h" #include "pgstat.h" #include "lib/pairingheap.h" @@ -31,9 +32,11 @@ * depending on whether the containing page is a leaf page or not. * * On success return for a heap tuple, *recheck_p is set to indicate whether - * recheck is needed. We recheck if any of the consistent() or distance() + * the quals need to be rechecked. We recheck if any of the consistent() * functions request it. recheck is not interesting when examining a non-leaf * entry, since we must visit the lower index page if there's any doubt. + * Similarly, *recheck_distances_p is set to indicate whether the distances + * need to be rechecked, and it is also ignored for non-leaf entries. * * If we are doing an ordered scan, so->distances[] is filled with distance * data from the distance() functions before returning success. @@ -50,7 +53,8 @@ gistindex_keytest(IndexScanDesc scan, IndexTuple tuple, Page page, OffsetNumber offset, - bool *recheck_p) + bool *recheck_p, + bool *recheck_distances_p) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; GISTSTATE *giststate = so->giststate; @@ -60,6 +64,7 @@ gistindex_keytest(IndexScanDesc scan, Relation r = scan->indexRelation; *recheck_p = false; + *recheck_distances_p = false; /* * If it's a leftover invalid tuple from pre-9.1, treat it as a match with @@ -194,11 +199,14 @@ gistindex_keytest(IndexScanDesc scan, * use.) * * Distance functions get a recheck argument as well. In this - * case the returned distance is the lower bound of distance - * and needs to be rechecked. We return single recheck flag - * which means that both quals and distances are to be - * rechecked. + * case the returned distance is the lower bound of distance and + * needs to be rechecked. We return single recheck flag which + * means that both quals and distances are to be rechecked. We + * initialize the flag to 'false'. The flag was added in version + * 9.5 and the distance operators written before that won't know + * about the flag, and are never lossy. */ + recheck = false; dist = FunctionCall5Coll(&key->sk_func, key->sk_collation, PointerGetDatum(&de), @@ -206,9 +214,7 @@ gistindex_keytest(IndexScanDesc scan, Int32GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype), PointerGetDatum(&recheck)); - - *recheck_p |= recheck; - + *recheck_distances_p |= recheck; *distance_p = DatumGetFloat8(dist); } @@ -310,6 +316,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, IndexTuple it = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); bool match; bool recheck; + bool recheck_distances; /* * Must call gistindex_keytest in tempCxt, and clean up any leftover @@ -317,7 +324,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, */ oldcxt = MemoryContextSwitchTo(so->giststate->tempCxt); - match = gistindex_keytest(scan, it, page, i, &recheck); + match = gistindex_keytest(scan, it, page, i, + &recheck, &recheck_distances); MemoryContextSwitchTo(oldcxt); MemoryContextReset(so->giststate->tempCxt); @@ -375,6 +383,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, item->blkno = InvalidBlockNumber; item->data.heap.heapPtr = it->t_tid; item->data.heap.recheck = recheck; + item->data.heap.recheckDistances = recheck_distances; /* * In an index-only scan, also fetch the data from the tuple. @@ -461,10 +470,33 @@ getNextNearest(IndexScanDesc scan) /* found a heap item at currently minimal distance */ scan->xs_ctup.t_self = item->data.heap.heapPtr; scan->xs_recheck = item->data.heap.recheck; + scan->xs_recheckorderby = item->data.heap.recheckDistances; for (i = 0; i < scan->numberOfOrderBys; i++) { - scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]); - scan->xs_orderbynulls[i] = false; + if (so->orderByTypes[i] == FLOAT8OID) + { + scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]); + scan->xs_orderbynulls[i] = false; + } + else if (so->orderByTypes[i] == FLOAT4OID) + { + scan->xs_orderbyvals[i] = Float4GetDatum((float4) item->distances[i]); + scan->xs_orderbynulls[i] = false; + } + else + { + /* + * If the ordering operator's return value is anything + * else, we don't know how to convert the float8 bound + * calculated by the distance function to that. The + * executor won't actually need the order by values we + * return here, if there are no lossy results, so only + * insist on the datatype if the *recheck is set. + */ + if (scan->xs_recheckorderby) + elog(ERROR, "GiST operator family's FOR ORDER BY operator must return float8 or float4 if the distance function is lossy"); + scan->xs_orderbynulls[i] = true; + } } /* in an index-only scan, also return the reconstructed tuple. */ -- cgit v1.2.1