diff options
| author | Tom Lane <tgl@sss.pgh.pa.us> | 2011-01-31 21:33:55 -0500 |
|---|---|---|
| committer | Tom Lane <tgl@sss.pgh.pa.us> | 2011-01-31 21:34:49 -0500 |
| commit | 6e2f3ae8842392c46ccc91a9ce4bba92296890cb (patch) | |
| tree | 5a672b0b9a6f1b5f0d1b264285cfb5a74574e13e /contrib/pg_trgm/trgm_gist.c | |
| parent | 6238473adb119a5a11061e40e159e8c5187fe492 (diff) | |
| download | postgresql-6e2f3ae8842392c46ccc91a9ce4bba92296890cb.tar.gz | |
Support LIKE and ILIKE index searches via contrib/pg_trgm indexes.
Unlike Btree-based LIKE optimization, this works for non-left-anchored
search patterns. The effectiveness of the search depends on how many
trigrams can be extracted from the pattern. (The worst case, with no
trigrams, degrades to a full-table scan, so this isn't a panacea. But
it can be very useful.)
Alexander Korotkov, reviewed by Jan Urbanski
Diffstat (limited to 'contrib/pg_trgm/trgm_gist.c')
| -rw-r--r-- | contrib/pg_trgm/trgm_gist.c | 95 |
1 files changed, 83 insertions, 12 deletions
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c index d9f3d40c17..d83265c11c 100644 --- a/contrib/pg_trgm/trgm_gist.c +++ b/contrib/pg_trgm/trgm_gist.c @@ -195,31 +195,61 @@ gtrgm_consistent(PG_FUNCTION_ARGS) TRGM *key = (TRGM *) DatumGetPointer(entry->key); TRGM *qtrg; bool res; - char *cache = (char *) fcinfo->flinfo->fn_extra; - - /* All cases served by this function are exact */ - *recheck = false; - - if (cache == NULL || VARSIZE(cache) != VARSIZE(query) || memcmp(cache, query, VARSIZE(query)) != 0) + char *cache = (char *) fcinfo->flinfo->fn_extra, + *cacheContents = cache + MAXALIGN(sizeof(StrategyNumber)); + + /* + * Store both the strategy number and extracted trigrams in cache, because + * trigram extraction is relatively CPU-expensive. We must include + * strategy number because trigram extraction depends on strategy. + */ + if (cache == NULL || strategy != *((StrategyNumber *) cache) || + VARSIZE(cacheContents) != VARSIZE(query) || + memcmp(cacheContents, query, VARSIZE(query)) != 0) { - qtrg = generate_trgm(VARDATA(query), VARSIZE(query) - VARHDRSZ); + switch (strategy) + { + case SimilarityStrategyNumber: + qtrg = generate_trgm(VARDATA(query), VARSIZE(query) - VARHDRSZ); + break; + case ILikeStrategyNumber: +#ifndef IGNORECASE + elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); +#endif + /* FALL THRU */ + case LikeStrategyNumber: + qtrg = generate_wildcard_trgm(VARDATA(query), VARSIZE(query) - VARHDRSZ); + break; + default: + elog(ERROR, "unrecognized strategy number: %d", strategy); + qtrg = NULL; /* keep compiler quiet */ + break; + } if (cache) pfree(cache); - fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, - MAXALIGN(VARSIZE(query)) + VARSIZE(qtrg)); + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + MAXALIGN(sizeof(StrategyNumber)) + + MAXALIGN(VARSIZE(query)) + + VARSIZE(qtrg)); cache = (char *) fcinfo->flinfo->fn_extra; + cacheContents = cache + MAXALIGN(sizeof(StrategyNumber)); - memcpy(cache, query, VARSIZE(query)); - memcpy(cache + MAXALIGN(VARSIZE(query)), qtrg, VARSIZE(qtrg)); + *((StrategyNumber *) cache) = strategy; + memcpy(cacheContents, query, VARSIZE(query)); + memcpy(cacheContents + MAXALIGN(VARSIZE(query)), qtrg, VARSIZE(qtrg)); } - qtrg = (TRGM *) (cache + MAXALIGN(VARSIZE(query))); + qtrg = (TRGM *) (cacheContents + MAXALIGN(VARSIZE(query))); switch (strategy) { case SimilarityStrategyNumber: + /* Similarity search is exact */ + *recheck = false; + if (GIST_LEAF(entry)) { /* all leafs contains orig trgm */ float4 tmpsml = cnt_sml(key, qtrg); @@ -242,6 +272,47 @@ gtrgm_consistent(PG_FUNCTION_ARGS) res = (((((float8) count) / ((float8) len))) >= trgm_limit) ? true : false; } break; + case ILikeStrategyNumber: +#ifndef IGNORECASE + elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); +#endif + /* FALL THRU */ + case LikeStrategyNumber: + /* Wildcard search is inexact */ + *recheck = true; + + /* + * Check if all the extracted trigrams can be present in child + * nodes. + */ + if (GIST_LEAF(entry)) + { /* all leafs contains orig trgm */ + res = trgm_contained_by(qtrg, key); + } + else if (ISALLTRUE(key)) + { /* non-leaf contains signature */ + res = true; + } + else + { /* non-leaf contains signature */ + int32 k, + tmp = 0, + len = ARRNELEM(qtrg); + trgm *ptr = GETARR(qtrg); + BITVECP sign = GETSIGN(key); + + res = true; + for (k = 0; k < len; k++) + { + CPTRGM(((char *) &tmp), ptr + k); + if (!GETBIT(sign, HASHVAL(tmp))) + { + res = false; + break; + } + } + } + break; default: elog(ERROR, "unrecognized strategy number: %d", strategy); res = false; /* keep compiler quiet */ |
