5 files changed, 109 insertions, 58 deletions
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 974a1b7ae4..ba4a10313c 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.3 2007/09/07 16:03:40 teodor Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.4 2007/09/11 08:46:29 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -25,13 +25,12 @@ gin_extract_tsvector(PG_FUNCTION_ARGS)
 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
 	Datum	   *entries = NULL;
 
-	*nentries = 0;
+	*nentries = vector->size;
 	if (vector->size > 0)
 	{
 		int			i;
 		WordEntry  *we = ARRPTR(vector);
 
-		*nentries = (uint32) vector->size;
 		entries = (Datum *) palloc(sizeof(Datum) * vector->size);
 
 		for (i = 0; i < vector->size; i++)
@@ -134,11 +133,19 @@ gin_ts_consistent(PG_FUNCTION_ARGS)
 
 	if (query->size > 0)
 	{
-		int4		i,
+		int			i,
 					j = 0;
 		QueryItem  *item;
 		GinChkVal	gcv;
 
+		/*
+		 * check-parameter array has one entry for each value (operand) in the
+		 * query. We expand that array into mapped_check, so that there's one
+		 * entry in mapped_check for every node in the query, including 
+		 * operators, to allow quick lookups in checkcondition_gin. Only the 
+		 * entries corresponding operands are actually used.
+		 */
+
 		gcv.frst = item = GETQUERY(query);
 		gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
 
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 4fc51378b4..985b917d0f 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.4 2007/09/11 08:46:29 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -133,20 +133,27 @@ gtsvectorout(PG_FUNCTION_ARGS)
 }
 
 static int
-compareint(const void *a, const void *b)
+compareint(const void *va, const void *vb)
 {
-	if (*((int4 *) a) == *((int4 *) b))
+	int4 a = *((int4 *) va);
+	int4 b = *((int4 *) vb);
+
+	if (a == b)
 		return 0;
-	return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+	return (a > b) ? 1 : -1;
 }
 
+/*
+ * Removes duplicates from an array of int4. 'l' is
+ * size of the input array. Returns the new size of the array.
+ */
 static int
 uniqueint(int4 *a, int4 l)
 {
 	int4	   *ptr,
 			   *res;
 
-	if (l == 1)
+	if (l <= 1)
 		return l;
 
 	ptr = res = a;
@@ -570,12 +577,15 @@ typedef struct
 } SPLITCOST;
 
 static int
-comparecost(const void *a, const void *b)
+comparecost(const void *va, const void *vb)
 {
-	if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+	SPLITCOST *a = (SPLITCOST *) va;
+	SPLITCOST *b = (SPLITCOST *) vb;
+
+	if (a->cost == b->cost)
 		return 0;
 	else
-		return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+		return (a->cost > b->cost) ? 1 : -1;
 }
 
 
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 535a3541bf..453b67df43 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.5 2007/09/11 08:46:29 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,22 +53,24 @@ cnt_length(TSVector t)
 {
 	WordEntry  *ptr = ARRPTR(t),
 			   *end = (WordEntry *) STRPTR(t);
-	int			len = 0,
-				clen;
+	int			len = 0;
 
 	while (ptr < end)
 	{
-		if ((clen = POSDATALEN(t, ptr)) == 0)
+		int clen = POSDATALEN(t, ptr);
+
+		if (clen == 0)
 			len += 1;
 		else
 			len += clen;
+
 		ptr++;
 	}
 
 	return len;
 }
 
-static int4
+static int
 WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
 {
 	if (ptr->len == item->length)
@@ -80,6 +82,10 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item
 	return (ptr->len > item->length) ? 1 : -1;
 }
 
+/*
+ * Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q'
+ * is the TSQuery containing 'item'. Returns NULL if not found.
+ */
 static WordEntry *
 find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
 {
@@ -178,15 +184,15 @@ SortAndUniqItems(TSQuery q, int *size)
 }
 
 /* A dummy WordEntryPos array to use when haspos is false */
-static WordEntryPos POSNULL[] = {
+static WordEntryPosVector POSNULL = {
 	1, /* Number of elements that follow */
-	0
+	{ 0 }
 };
 
 static float
 calc_rank_and(float *w, TSVector t, TSQuery q)
 {
-	uint16	  **pos;
+	WordEntryPosVector	 **pos;
 	int			i,
 				k,
 				l,
@@ -207,9 +213,8 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
 		pfree(item);
 		return calc_rank_or(w, t, q);
 	}
-	pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
-	memset(pos, 0, sizeof(uint16 *) * q->size);
-	WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
+	pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
+	WEP_SETPOS(POSNULL.pos[0], MAXENTRYPOS - 1);
 
 	for (i = 0; i < size; i++)
 	{
@@ -218,25 +223,25 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
 			continue;
 
 		if (entry->haspos)
-			pos[i] = (uint16 *) _POSDATAPTR(t, entry);
+			pos[i] = _POSVECPTR(t, entry);
 		else
-			pos[i] = (uint16 *) POSNULL;
+			pos[i] = &POSNULL;
 
 
-		dimt = *(uint16 *) (pos[i]);
-		post = (WordEntryPos *) (pos[i] + 1);
+		dimt = pos[i]->npos;
+		post = pos[i]->pos;
 		for (k = 0; k < i; k++)
 		{
 			if (!pos[k])
 				continue;
-			lenct = *(uint16 *) (pos[k]);
-			ct = (WordEntryPos *) (pos[k] + 1);
+			lenct = pos[k]->npos;
+			ct = pos[k]->pos;
 			for (l = 0; l < dimt; l++)
 			{
 				for (p = 0; p < lenct; p++)
 				{
 					dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
-					if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
+					if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
 					{
 						float		curw;
 
@@ -285,8 +290,8 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
 		}
 		else
 		{
-			dimt = *(uint16 *) POSNULL;
-			post = POSNULL + 1;
+			dimt = POSNULL.npos;
+			post = POSNULL.pos;
 		}
 
 		resj = 0.0;
@@ -456,17 +461,19 @@ typedef struct
 {
 	QueryItem **item;
 	int16		nitem;
-	bool		needfree;
 	uint8		wclass;
 	int32		pos;
 } DocRepresentation;
 
 static int
-compareDocR(const void *a, const void *b)
+compareDocR(const void *va, const void *vb)
 {
-	if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
+	DocRepresentation *a = (DocRepresentation *) va;
+	DocRepresentation *b = (DocRepresentation *) vb;
+
+	if (a->pos == b->pos)
 		return 0;
-	return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
+	return (a->pos > b->pos) ? 1 : -1;
 }
 
 static bool
@@ -547,11 +554,11 @@ Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
 
 	ptr = doc + lastpos;
 
-	/* find lower bound of cover from founded upper bound, move down */
+	/* find lower bound of cover from found upper bound, move down */
 	while (ptr >= doc + ext->pos)
 	{
 		for (i = 0; i < ptr->nitem; i++)
-			if(ptr->item[i]->type  == QI_VAL) /* XXX */
+			if(ptr->item[i]->type  == QI_VAL)
 				ptr->item[i]->operand.istrue = 1;
 		if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand))
 		{
@@ -620,8 +627,8 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
 		}
 		else
 		{
-			dimt = *(uint16 *) POSNULL;
-			post = POSNULL + 1;
+			dimt = POSNULL.npos;
+			post = POSNULL.pos;
 		}
 
 		while (cur + dimt >= len)
@@ -636,7 +643,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
 			{
 				int			k;
 
-				doc[cur].needfree = false;
 				doc[cur].nitem = 0;
 				doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * query->size);
 
@@ -658,7 +664,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
 			}
 			else
 			{
-				doc[cur].needfree = false;
 				doc[cur].nitem = doc[cur - 1].nitem;
 				doc[cur].item = doc[cur - 1].item;
 			}
@@ -764,9 +769,6 @@ calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method)
 	if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
 		Wdoc /= log((double) (txt->size + 1)) / log(2.0);
 
-	for (i = 0; i < doclen; i++)
-		if (doc[i].needfree)
-			pfree(doc[i].item);
 	pfree(doc);
 
 	return (float4) Wdoc;
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 8e7593513f..e150f9a267 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.5 2007/09/11 08:46:29 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -269,7 +269,7 @@ compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b)
 static int4
 add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, int4 maxpos)
 {
-	uint16	   *clen = (uint16 *) _POSDATAPTR(dest, destptr);
+	uint16	   *clen = &_POSVECPTR(dest, destptr)->npos;
 	int			i;
 	uint16		slen = POSDATALEN(src, srcptr),
 				startlen;
@@ -354,7 +354,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
 			if (ptr->haspos)
 			{
 				cur += SHORTALIGN(ptr1->len);
-				memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+				memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
 				cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
 			}
 			else
@@ -399,7 +399,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
 				cur += SHORTALIGN(ptr1->len);
 				if (ptr1->haspos)
 				{
-					memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+					memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
 					cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
 					if (ptr2->haspos)
 						cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
@@ -434,7 +434,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
 		if (ptr->haspos)
 		{
 			cur += SHORTALIGN(ptr1->len);
-			memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+			memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
 			cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
 		}
 		else
@@ -499,10 +499,17 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
  * check weight info
  */
 static bool
-checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
+checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item)
 {
-	WordEntryPos *ptr = (WordEntryPos *) (chkval->values + SHORTALIGN(val->pos + val->len) + sizeof(uint16));
-	uint16		len = *((uint16 *) (chkval->values + SHORTALIGN(val->pos + val->len)));
+	WordEntryPosVector *posvec;
+	WordEntryPos *ptr;
+	uint16		len;
+
+	posvec = (WordEntryPosVector *) 
+		(chkval->values + SHORTALIGN(val->pos + val->len));
+
+	len = posvec->npos;
+	ptr = posvec->pos;
 
 	while (len--)
 	{
@@ -674,7 +681,13 @@ ts_match_tq(PG_FUNCTION_ARGS)
 }
 
 /*
- * Statistics of tsvector
+ * ts_stat statistic function support
+ */
+
+
+/*
+ * Returns the number of positions in value 'wptr' within tsvector 'txt',
+ * that have a weight equal to one of the weights in 'weight' bitmask.
  */
 static int
 check_weight(TSVector txt, WordEntry * wptr, int8 weight)
@@ -824,6 +837,18 @@ formstat(tsstat * stat, TSVector txt, WordEntry ** entry, uint32 len)
 	return newstat;
 }
 
+/*
+ * This is written like a custom aggregate function, because the
+ * original plan was to do just that. Unfortunately, an aggregate function
+ * can't return a set, so that plan was abandoned. If that limitation is
+ * lifted in the future, ts_stat could be a real aggregate function so that 
+ * you could use it like this:
+ *
+ *   SELECT ts_stat(vector_column) FROM vector_table;
+ *
+ *  where vector_column is a tsvector-type column in vector_table.
+ */
+
 static tsstat *
 ts_accum(tsstat * stat, Datum data)
 {
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 0aa95e892c..107fc4a711 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 1998-2007, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.4 2007/09/07 16:03:40 teodor Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.5 2007/09/11 08:46:29 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,6 +43,13 @@ typedef struct
 
 typedef uint16 WordEntryPos;
 
+typedef struct
+{
+	uint16 npos;
+	WordEntryPos pos[1]; /* var length */
+} WordEntryPosVector;
+
+
 #define WEP_GETWEIGHT(x)	( (x) >> 14 )
 #define WEP_GETPOS(x)		( (x) & 0x3fff )
 
@@ -88,9 +95,9 @@ typedef TSVectorData *TSVector;
 /* returns a pointer to the beginning of lexemes */
 #define STRPTR(x)	( (char *) &(x)->entries[x->size] )
 
-#define _POSDATAPTR(x,e)	(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len))
-#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
-#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+#define _POSVECPTR(x, e) 	((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
+#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
+#define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
 
 /*
  * fmgr interface macros