summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/tsginidx.c15
-rw-r--r--src/backend/utils/adt/tsgistidx.c26
-rw-r--r--src/backend/utils/adt/tsrank.c68
-rw-r--r--src/backend/utils/adt/tsvector_op.c43
-rw-r--r--src/include/tsearch/ts_type.h15
5 files changed, 109 insertions, 58 deletions
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 974a1b7ae4..ba4a10313c 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.3 2007/09/07 16:03:40 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.4 2007/09/11 08:46:29 teodor Exp $
*
*-------------------------------------------------------------------------
*/
@@ -25,13 +25,12 @@ gin_extract_tsvector(PG_FUNCTION_ARGS)
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
Datum *entries = NULL;
- *nentries = 0;
+ *nentries = vector->size;
if (vector->size > 0)
{
int i;
WordEntry *we = ARRPTR(vector);
- *nentries = (uint32) vector->size;
entries = (Datum *) palloc(sizeof(Datum) * vector->size);
for (i = 0; i < vector->size; i++)
@@ -134,11 +133,19 @@ gin_ts_consistent(PG_FUNCTION_ARGS)
if (query->size > 0)
{
- int4 i,
+ int i,
j = 0;
QueryItem *item;
GinChkVal gcv;
+ /*
+ * check-parameter array has one entry for each value (operand) in the
+ * query. We expand that array into mapped_check, so that there's one
+ * entry in mapped_check for every node in the query, including
+ * operators, to allow quick lookups in checkcondition_gin. Only the
+ * entries corresponding operands are actually used.
+ */
+
gcv.frst = item = GETQUERY(query);
gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 4fc51378b4..985b917d0f 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.4 2007/09/11 08:46:29 teodor Exp $
*
*-------------------------------------------------------------------------
*/
@@ -133,20 +133,27 @@ gtsvectorout(PG_FUNCTION_ARGS)
}
static int
-compareint(const void *a, const void *b)
+compareint(const void *va, const void *vb)
{
- if (*((int4 *) a) == *((int4 *) b))
+ int4 a = *((int4 *) va);
+ int4 b = *((int4 *) vb);
+
+ if (a == b)
return 0;
- return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+ return (a > b) ? 1 : -1;
}
+/*
+ * Removes duplicates from an array of int4. 'l' is
+ * size of the input array. Returns the new size of the array.
+ */
static int
uniqueint(int4 *a, int4 l)
{
int4 *ptr,
*res;
- if (l == 1)
+ if (l <= 1)
return l;
ptr = res = a;
@@ -570,12 +577,15 @@ typedef struct
} SPLITCOST;
static int
-comparecost(const void *a, const void *b)
+comparecost(const void *va, const void *vb)
{
- if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+ SPLITCOST *a = (SPLITCOST *) va;
+ SPLITCOST *b = (SPLITCOST *) vb;
+
+ if (a->cost == b->cost)
return 0;
else
- return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+ return (a->cost > b->cost) ? 1 : -1;
}
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 535a3541bf..453b67df43 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.5 2007/09/11 08:46:29 teodor Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,22 +53,24 @@ cnt_length(TSVector t)
{
WordEntry *ptr = ARRPTR(t),
*end = (WordEntry *) STRPTR(t);
- int len = 0,
- clen;
+ int len = 0;
while (ptr < end)
{
- if ((clen = POSDATALEN(t, ptr)) == 0)
+ int clen = POSDATALEN(t, ptr);
+
+ if (clen == 0)
len += 1;
else
len += clen;
+
ptr++;
}
return len;
}
-static int4
+static int
WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
{
if (ptr->len == item->length)
@@ -80,6 +82,10 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item
return (ptr->len > item->length) ? 1 : -1;
}
+/*
+ * Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q'
+ * is the TSQuery containing 'item'. Returns NULL if not found.
+ */
static WordEntry *
find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
{
@@ -178,15 +184,15 @@ SortAndUniqItems(TSQuery q, int *size)
}
/* A dummy WordEntryPos array to use when haspos is false */
-static WordEntryPos POSNULL[] = {
+static WordEntryPosVector POSNULL = {
1, /* Number of elements that follow */
- 0
+ { 0 }
};
static float
calc_rank_and(float *w, TSVector t, TSQuery q)
{
- uint16 **pos;
+ WordEntryPosVector **pos;
int i,
k,
l,
@@ -207,9 +213,8 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
pfree(item);
return calc_rank_or(w, t, q);
}
- pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
- memset(pos, 0, sizeof(uint16 *) * q->size);
- WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
+ pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
+ WEP_SETPOS(POSNULL.pos[0], MAXENTRYPOS - 1);
for (i = 0; i < size; i++)
{
@@ -218,25 +223,25 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
continue;
if (entry->haspos)
- pos[i] = (uint16 *) _POSDATAPTR(t, entry);
+ pos[i] = _POSVECPTR(t, entry);
else
- pos[i] = (uint16 *) POSNULL;
+ pos[i] = &POSNULL;
- dimt = *(uint16 *) (pos[i]);
- post = (WordEntryPos *) (pos[i] + 1);
+ dimt = pos[i]->npos;
+ post = pos[i]->pos;
for (k = 0; k < i; k++)
{
if (!pos[k])
continue;
- lenct = *(uint16 *) (pos[k]);
- ct = (WordEntryPos *) (pos[k] + 1);
+ lenct = pos[k]->npos;
+ ct = pos[k]->pos;
for (l = 0; l < dimt; l++)
{
for (p = 0; p < lenct; p++)
{
dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
- if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
+ if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
{
float curw;
@@ -285,8 +290,8 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
}
else
{
- dimt = *(uint16 *) POSNULL;
- post = POSNULL + 1;
+ dimt = POSNULL.npos;
+ post = POSNULL.pos;
}
resj = 0.0;
@@ -456,17 +461,19 @@ typedef struct
{
QueryItem **item;
int16 nitem;
- bool needfree;
uint8 wclass;
int32 pos;
} DocRepresentation;
static int
-compareDocR(const void *a, const void *b)
+compareDocR(const void *va, const void *vb)
{
- if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
+ DocRepresentation *a = (DocRepresentation *) va;
+ DocRepresentation *b = (DocRepresentation *) vb;
+
+ if (a->pos == b->pos)
return 0;
- return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
+ return (a->pos > b->pos) ? 1 : -1;
}
static bool
@@ -547,11 +554,11 @@ Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
ptr = doc + lastpos;
- /* find lower bound of cover from founded upper bound, move down */
+ /* find lower bound of cover from found upper bound, move down */
while (ptr >= doc + ext->pos)
{
for (i = 0; i < ptr->nitem; i++)
- if(ptr->item[i]->type == QI_VAL) /* XXX */
+ if(ptr->item[i]->type == QI_VAL)
ptr->item[i]->operand.istrue = 1;
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand))
{
@@ -620,8 +627,8 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
}
else
{
- dimt = *(uint16 *) POSNULL;
- post = POSNULL + 1;
+ dimt = POSNULL.npos;
+ post = POSNULL.pos;
}
while (cur + dimt >= len)
@@ -636,7 +643,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
{
int k;
- doc[cur].needfree = false;
doc[cur].nitem = 0;
doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * query->size);
@@ -658,7 +664,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
}
else
{
- doc[cur].needfree = false;
doc[cur].nitem = doc[cur - 1].nitem;
doc[cur].item = doc[cur - 1].item;
}
@@ -764,9 +769,6 @@ calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method)
if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
Wdoc /= log((double) (txt->size + 1)) / log(2.0);
- for (i = 0; i < doclen; i++)
- if (doc[i].needfree)
- pfree(doc[i].item);
pfree(doc);
return (float4) Wdoc;
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 8e7593513f..e150f9a267 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.5 2007/09/11 08:46:29 teodor Exp $
*
*-------------------------------------------------------------------------
*/
@@ -269,7 +269,7 @@ compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b)
static int4
add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, int4 maxpos)
{
- uint16 *clen = (uint16 *) _POSDATAPTR(dest, destptr);
+ uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
int i;
uint16 slen = POSDATALEN(src, srcptr),
startlen;
@@ -354,7 +354,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
if (ptr->haspos)
{
cur += SHORTALIGN(ptr1->len);
- memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+ memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
}
else
@@ -399,7 +399,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
cur += SHORTALIGN(ptr1->len);
if (ptr1->haspos)
{
- memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+ memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
if (ptr2->haspos)
cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
@@ -434,7 +434,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
if (ptr->haspos)
{
cur += SHORTALIGN(ptr1->len);
- memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+ memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
}
else
@@ -499,10 +499,17 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
* check weight info
*/
static bool
-checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
+checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item)
{
- WordEntryPos *ptr = (WordEntryPos *) (chkval->values + SHORTALIGN(val->pos + val->len) + sizeof(uint16));
- uint16 len = *((uint16 *) (chkval->values + SHORTALIGN(val->pos + val->len)));
+ WordEntryPosVector *posvec;
+ WordEntryPos *ptr;
+ uint16 len;
+
+ posvec = (WordEntryPosVector *)
+ (chkval->values + SHORTALIGN(val->pos + val->len));
+
+ len = posvec->npos;
+ ptr = posvec->pos;
while (len--)
{
@@ -674,7 +681,13 @@ ts_match_tq(PG_FUNCTION_ARGS)
}
/*
- * Statistics of tsvector
+ * ts_stat statistic function support
+ */
+
+
+/*
+ * Returns the number of positions in value 'wptr' within tsvector 'txt',
+ * that have a weight equal to one of the weights in 'weight' bitmask.
*/
static int
check_weight(TSVector txt, WordEntry * wptr, int8 weight)
@@ -824,6 +837,18 @@ formstat(tsstat * stat, TSVector txt, WordEntry ** entry, uint32 len)
return newstat;
}
+/*
+ * This is written like a custom aggregate function, because the
+ * original plan was to do just that. Unfortunately, an aggregate function
+ * can't return a set, so that plan was abandoned. If that limitation is
+ * lifted in the future, ts_stat could be a real aggregate function so that
+ * you could use it like this:
+ *
+ * SELECT ts_stat(vector_column) FROM vector_table;
+ *
+ * where vector_column is a tsvector-type column in vector_table.
+ */
+
static tsstat *
ts_accum(tsstat * stat, Datum data)
{
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 0aa95e892c..107fc4a711 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.4 2007/09/07 16:03:40 teodor Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.5 2007/09/11 08:46:29 teodor Exp $
*
*-------------------------------------------------------------------------
*/
@@ -43,6 +43,13 @@ typedef struct
typedef uint16 WordEntryPos;
+typedef struct
+{
+ uint16 npos;
+ WordEntryPos pos[1]; /* var length */
+} WordEntryPosVector;
+
+
#define WEP_GETWEIGHT(x) ( (x) >> 14 )
#define WEP_GETPOS(x) ( (x) & 0x3fff )
@@ -88,9 +95,9 @@ typedef TSVectorData *TSVector;
/* returns a pointer to the beginning of lexemes */
#define STRPTR(x) ( (char *) &(x)->entries[x->size] )
-#define _POSDATAPTR(x,e) (STRPTR(x) + SHORTALIGN((e)->pos + (e)->len))
-#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
-#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+#define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
+#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
+#define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
/*
* fmgr interface macros