summaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtsort.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtsort.c')
-rw-r--r--src/backend/access/nbtree/nbtsort.c1926
1 files changed, 1003 insertions, 923 deletions
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 8e054d24ab..09cb43769f 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -5,30 +5,30 @@
*
*
* IDENTIFICATION
- * $Id: nbtsort.c,v 1.19 1997/08/19 21:29:46 momjian Exp $
+ * $Id: nbtsort.c,v 1.20 1997/09/07 04:39:02 momjian Exp $
*
* NOTES
*
* what we do is:
* - generate a set of initial one-block runs, distributed round-robin
- * between the output tapes.
+ * between the output tapes.
* - for each pass,
- * - swap input and output tape sets, rewinding both and truncating
- * the output tapes.
- * - merge the current run in each input tape to the current output
- * tape.
- * - when each input run has been exhausted, switch to another output
- * tape and start processing another run.
+ * - swap input and output tape sets, rewinding both and truncating
+ * the output tapes.
+ * - merge the current run in each input tape to the current output
+ * tape.
+ * - when each input run has been exhausted, switch to another output
+ * tape and start processing another run.
* - when we have fewer runs than tapes, we know we are ready to start
- * merging into the btree leaf pages. (i.e., we do not have to wait
- * until we have exactly one tape.)
+ * merging into the btree leaf pages. (i.e., we do not have to wait
+ * until we have exactly one tape.)
* - as we extract tuples from the final runs, we build the pages for
- * each level. when we have only one page on a level, it must be the
- * root -- it can be attached to the btree metapage and we are done.
+ * each level. when we have only one page on a level, it must be the
+ * root -- it can be attached to the btree metapage and we are done.
*
* conventions:
* - external interface routines take in and return "void *" for their
- * opaque handles. this is for modularity reasons.
+ * opaque handles. this is for modularity reasons.
*
* this code is moderately slow (~10% slower) compared to the regular
* btree (insertion) build code on sorted or well-clustered data. on
@@ -58,20 +58,21 @@
#ifndef HAVE_MEMMOVE
-# include <regex/utils.h>
+#include <regex/utils.h>
#else
-# include <string.h>
+#include <string.h>
#endif
#ifdef BTREE_BUILD_STATS
#include <tcop/tcopprot.h>
-extern int ShowExecutorStats;
+extern int ShowExecutorStats;
+
#endif
-static BTItem _bt_buildadd(Relation index, void *pstate, BTItem bti, int flags);
-static BTItem _bt_minitem(Page opage, BlockNumber oblkno, int atend);
-static void *_bt_pagestate(Relation index, int flags, int level, bool doupper);
-static void _bt_uppershutdown(Relation index, BTPageState *state);
+static BTItem _bt_buildadd(Relation index, void *pstate, BTItem bti, int flags);
+static BTItem _bt_minitem(Page opage, BlockNumber oblkno, int atend);
+static void *_bt_pagestate(Relation index, int flags, int level, bool doupper);
+static void _bt_uppershutdown(Relation index, BTPageState * state);
/*
* turn on debugging output.
@@ -83,18 +84,18 @@ static void _bt_uppershutdown(Relation index, BTPageState *state);
#define FASTBUILD_SPOOL
#define FASTBUILD_MERGE
-#define MAXTAPES (7)
-#define TAPEBLCKSZ (MAXBLCKSZ << 2)
-#define TAPETEMP "pg_btsortXXXXXX"
+#define MAXTAPES (7)
+#define TAPEBLCKSZ (MAXBLCKSZ << 2)
+#define TAPETEMP "pg_btsortXXXXXX"
-extern int NDirectFileRead;
-extern int NDirectFileWrite;
-extern char *mktemp(char *template);
+extern int NDirectFileRead;
+extern int NDirectFileWrite;
+extern char *mktemp(char *template);
/*
- * this is what we use to shovel BTItems in and out of memory. it's
+ * this is what we use to shovel BTItems in and out of memory. it's
* bigger than a standard block because we are doing a lot of strictly
- * sequential i/o. this is obviously something of a tradeoff since we
+ * sequential i/o. this is obviously something of a tradeoff since we
* are potentially reading a bunch of zeroes off of disk in many
* cases.
*
@@ -104,14 +105,15 @@ extern char *mktemp(char *template);
* the only thing like that so i'm not going to worry about wasting a
* few bytes.
*/
-typedef struct {
- int bttb_magic; /* magic number */
- int bttb_fd; /* file descriptor */
- int bttb_top; /* top of free space within bttb_data */
- short bttb_ntup; /* number of tuples in this block */
- short bttb_eor; /* End-Of-Run marker */
- char bttb_data[TAPEBLCKSZ - 2 * sizeof(double)];
-} BTTapeBlock;
+typedef struct
+{
+ int bttb_magic; /* magic number */
+ int bttb_fd; /* file descriptor */
+ int bttb_top; /* top of free space within bttb_data */
+ short bttb_ntup; /* number of tuples in this block */
+ short bttb_eor; /* End-Of-Run marker */
+ char bttb_data[TAPEBLCKSZ - 2 * sizeof(double)];
+} BTTapeBlock;
/*
* this structure holds the bookkeeping for a simple balanced multiway
@@ -120,13 +122,14 @@ typedef struct {
* right now. though if psort was in a condition that i could hack it
* to do this, you bet i would.)
*/
-typedef struct {
- int bts_ntapes;
- int bts_tape;
- BTTapeBlock **bts_itape; /* input tape blocks */
- BTTapeBlock **bts_otape; /* output tape blocks */
- bool isunique;
-} BTSpool;
+typedef struct
+{
+ int bts_ntapes;
+ int bts_tape;
+ BTTapeBlock **bts_itape; /* input tape blocks */
+ BTTapeBlock **bts_otape; /* output tape blocks */
+ bool isunique;
+} BTSpool;
/*-------------------------------------------------------------------------
* sorting comparison routine - returns {-1,0,1} depending on whether
@@ -146,101 +149,102 @@ typedef struct {
* what the heck.
* *-------------------------------------------------------------------------
*/
-typedef struct {
- Datum *btsk_datum;
- char *btsk_nulls;
- BTItem btsk_item;
-} BTSortKey;
+typedef struct
+{
+ Datum *btsk_datum;
+ char *btsk_nulls;
+ BTItem btsk_item;
+} BTSortKey;
static Relation _bt_sortrel;
-static int _bt_nattr;
-static BTSpool * _bt_inspool;
+static int _bt_nattr;
+static BTSpool *_bt_inspool;
static void
-_bt_isortcmpinit(Relation index, BTSpool *spool)
+_bt_isortcmpinit(Relation index, BTSpool * spool)
{
- _bt_sortrel = index;
- _bt_inspool = spool;
- _bt_nattr = index->rd_att->natts;
+ _bt_sortrel = index;
+ _bt_inspool = spool;
+ _bt_nattr = index->rd_att->natts;
}
static int
-_bt_isortcmp(BTSortKey *k1, BTSortKey *k2)
+_bt_isortcmp(BTSortKey * k1, BTSortKey * k2)
{
- Datum *k1_datum = k1->btsk_datum;
- Datum *k2_datum = k2->btsk_datum;
- char *k1_nulls = k1->btsk_nulls;
- char *k2_nulls = k2->btsk_nulls;
- bool equal_isnull = false;
- int i;
-
- if (k1->btsk_item == (BTItem) NULL)
- {
- if (k2->btsk_item == (BTItem) NULL)
- return(0); /* 1 = 2 */
- return(1); /* 1 > 2 */
- }
- else if (k2->btsk_item == (BTItem) NULL)
- return(-1); /* 1 < 2 */
-
- for (i = 0; i < _bt_nattr; i++)
- {
- if ( k1_nulls[i] != ' ' ) /* k1 attr is NULL */
+ Datum *k1_datum = k1->btsk_datum;
+ Datum *k2_datum = k2->btsk_datum;
+ char *k1_nulls = k1->btsk_nulls;
+ char *k2_nulls = k2->btsk_nulls;
+ bool equal_isnull = false;
+ int i;
+
+ if (k1->btsk_item == (BTItem) NULL)
{
- if ( k2_nulls[i] != ' ' ) /* the same for k2 */
- {
- equal_isnull = true;
- continue;
- }
- return (1); /* NULL ">" NOT_NULL */
+ if (k2->btsk_item == (BTItem) NULL)
+ return (0); /* 1 = 2 */
+ return (1); /* 1 > 2 */
}
- else if ( k2_nulls[i] != ' ' ) /* k2 attr is NULL */
- return (-1); /* NOT_NULL "<" NULL */
-
- if (_bt_invokestrat(_bt_sortrel, i+1, BTGreaterStrategyNumber,
- k1_datum[i], k2_datum[i]))
- return(1); /* 1 > 2 */
- else if (_bt_invokestrat(_bt_sortrel, i+1, BTGreaterStrategyNumber,
- k2_datum[i], k1_datum[i]))
- return(-1); /* 1 < 2 */
- }
-
- if ( _bt_inspool->isunique && !equal_isnull )
- {
- _bt_spooldestroy ((void*)_bt_inspool);
- elog (WARN, "Cannot create unique index. Table contains non-unique values");
- }
- return(0); /* 1 = 2 */
+ else if (k2->btsk_item == (BTItem) NULL)
+ return (-1); /* 1 < 2 */
+
+ for (i = 0; i < _bt_nattr; i++)
+ {
+ if (k1_nulls[i] != ' ') /* k1 attr is NULL */
+ {
+ if (k2_nulls[i] != ' ') /* the same for k2 */
+ {
+ equal_isnull = true;
+ continue;
+ }
+ return (1); /* NULL ">" NOT_NULL */
+ }
+ else if (k2_nulls[i] != ' ') /* k2 attr is NULL */
+ return (-1); /* NOT_NULL "<" NULL */
+
+ if (_bt_invokestrat(_bt_sortrel, i + 1, BTGreaterStrategyNumber,
+ k1_datum[i], k2_datum[i]))
+ return (1); /* 1 > 2 */
+ else if (_bt_invokestrat(_bt_sortrel, i + 1, BTGreaterStrategyNumber,
+ k2_datum[i], k1_datum[i]))
+ return (-1); /* 1 < 2 */
+ }
+
+ if (_bt_inspool->isunique && !equal_isnull)
+ {
+ _bt_spooldestroy((void *) _bt_inspool);
+ elog(WARN, "Cannot create unique index. Table contains non-unique values");
+ }
+ return (0); /* 1 = 2 */
}
static void
-_bt_setsortkey(Relation index, BTItem bti, BTSortKey *sk)
+_bt_setsortkey(Relation index, BTItem bti, BTSortKey * sk)
{
- sk->btsk_item = (BTItem) NULL;
- sk->btsk_datum = (Datum*) NULL;
- sk->btsk_nulls = (char*) NULL;
-
- if (bti != (BTItem) NULL)
- {
- IndexTuple it = &(bti->bti_itup);
- TupleDesc itdesc = index->rd_att;
- Datum *dp = (Datum*) palloc (_bt_nattr * sizeof (Datum));
- char *np = (char*) palloc (_bt_nattr * sizeof (char));
- bool isnull;
- int i;
-
- for (i = 0; i < _bt_nattr; i++)
- {
- dp[i] = index_getattr(it, i+1, itdesc, &isnull);
- if ( isnull )
- np[i] = 'n';
- else
- np[i] = ' ';
+ sk->btsk_item = (BTItem) NULL;
+ sk->btsk_datum = (Datum *) NULL;
+ sk->btsk_nulls = (char *) NULL;
+
+ if (bti != (BTItem) NULL)
+ {
+ IndexTuple it = &(bti->bti_itup);
+ TupleDesc itdesc = index->rd_att;
+ Datum *dp = (Datum *) palloc(_bt_nattr * sizeof(Datum));
+ char *np = (char *) palloc(_bt_nattr * sizeof(char));
+ bool isnull;
+ int i;
+
+ for (i = 0; i < _bt_nattr; i++)
+ {
+ dp[i] = index_getattr(it, i + 1, itdesc, &isnull);
+ if (isnull)
+ np[i] = 'n';
+ else
+ np[i] = ' ';
+ }
+ sk->btsk_item = bti;
+ sk->btsk_datum = dp;
+ sk->btsk_nulls = np;
}
- sk->btsk_item = bti;
- sk->btsk_datum = dp;
- sk->btsk_nulls = np;
- }
}
/*-------------------------------------------------------------------------
@@ -254,84 +258,100 @@ _bt_setsortkey(Relation index, BTItem bti, BTSortKey *sk)
* XXX these probably ought to be generic library functions.
*-------------------------------------------------------------------------
*/
-typedef struct {
- int btpqe_tape; /* tape identifier */
- BTSortKey btpqe_item; /* pointer to BTItem in tape buffer */
-} BTPriQueueElem;
-
-#define MAXELEM MAXTAPES
-typedef struct {
- int btpq_nelem;
- BTPriQueueElem btpq_queue[MAXELEM];
- Relation btpq_rel;
-} BTPriQueue;
+typedef struct
+{
+ int btpqe_tape; /* tape identifier */
+ BTSortKey btpqe_item; /* pointer to BTItem in tape buffer */
+} BTPriQueueElem;
+
+#define MAXELEM MAXTAPES
+typedef struct
+{
+ int btpq_nelem;
+ BTPriQueueElem btpq_queue[MAXELEM];
+ Relation btpq_rel;
+} BTPriQueue;
/* be sure to call _bt_isortcmpinit first */
#define GREATER(a, b) \
- (_bt_isortcmp(&((a)->btpqe_item), &((b)->btpqe_item)) > 0)
+ (_bt_isortcmp(&((a)->btpqe_item), &((b)->btpqe_item)) > 0)
static void
-_bt_pqsift(BTPriQueue *q, int parent)
+_bt_pqsift(BTPriQueue * q, int parent)
{
- int child;
- BTPriQueueElem e;
-
- for (child = parent * 2 + 1;
- child < q->btpq_nelem;
- child = parent * 2 + 1) {
- if (child < q->btpq_nelem - 1) {
- if (GREATER(&(q->btpq_queue[child]), &(q->btpq_queue[child+1]))) {
- ++child;
- }
- }
- if (GREATER(&(q->btpq_queue[parent]), &(q->btpq_queue[child]))) {
- e = q->btpq_queue[child]; /* struct = */
- q->btpq_queue[child] = q->btpq_queue[parent]; /* struct = */
- q->btpq_queue[parent] = e; /* struct = */
- parent = child;
- } else {
- parent = child + 1;
+ int child;
+ BTPriQueueElem e;
+
+ for (child = parent * 2 + 1;
+ child < q->btpq_nelem;
+ child = parent * 2 + 1)
+ {
+ if (child < q->btpq_nelem - 1)
+ {
+ if (GREATER(&(q->btpq_queue[child]), &(q->btpq_queue[child + 1])))
+ {
+ ++child;
+ }
+ }
+ if (GREATER(&(q->btpq_queue[parent]), &(q->btpq_queue[child])))
+ {
+ e = q->btpq_queue[child]; /* struct = */
+ q->btpq_queue[child] = q->btpq_queue[parent]; /* struct = */
+ q->btpq_queue[parent] = e; /* struct = */
+ parent = child;
+ }
+ else
+ {
+ parent = child + 1;
+ }
}
- }
}
static int
-_bt_pqnext(BTPriQueue *q, BTPriQueueElem *e)
+_bt_pqnext(BTPriQueue * q, BTPriQueueElem * e)
{
- if (q->btpq_nelem < 1) { /* already empty */
- return(-1);
- }
- *e = q->btpq_queue[0]; /* struct = */
-
- if (--q->btpq_nelem < 1) { /* now empty, don't sift */
- return(0);
- }
- q->btpq_queue[0] = q->btpq_queue[q->btpq_nelem]; /* struct = */
- _bt_pqsift(q, 0);
- return(0);
+ if (q->btpq_nelem < 1)
+ { /* already empty */
+ return (-1);
+ }
+ *e = q->btpq_queue[0]; /* struct = */
+
+ if (--q->btpq_nelem < 1)
+ { /* now empty, don't sift */
+ return (0);
+ }
+ q->btpq_queue[0] = q->btpq_queue[q->btpq_nelem]; /* struct = */
+ _bt_pqsift(q, 0);
+ return (0);
}
static void
-_bt_pqadd(BTPriQueue *q, BTPriQueueElem *e)
+_bt_pqadd(BTPriQueue * q, BTPriQueueElem * e)
{
- int child, parent;
-
- if (q->btpq_nelem >= MAXELEM) {
- elog(WARN, "_bt_pqadd: queue overflow");
- }
-
- child = q->btpq_nelem++;
- while (child > 0) {
- parent = child / 2;
- if (GREATER(e, &(q->btpq_queue[parent]))) {
- break;
- } else {
- q->btpq_queue[child] = q->btpq_queue[parent]; /* struct = */
- child = parent;
+ int child,
+ parent;
+
+ if (q->btpq_nelem >= MAXELEM)
+ {
+ elog(WARN, "_bt_pqadd: queue overflow");
+ }
+
+ child = q->btpq_nelem++;
+ while (child > 0)
+ {
+ parent = child / 2;
+ if (GREATER(e, &(q->btpq_queue[parent])))
+ {
+ break;
+ }
+ else
+ {
+ q->btpq_queue[child] = q->btpq_queue[parent]; /* struct = */
+ child = parent;
+ }
}
- }
- q->btpq_queue[child] = *e; /* struct = */
+ q->btpq_queue[child] = *e; /* struct = */
}
/*-------------------------------------------------------------------------
@@ -339,37 +359,37 @@ _bt_pqadd(BTPriQueue *q, BTPriQueueElem *e)
*-------------------------------------------------------------------------
*/
-#define BTITEMSZ(btitem) \
- ((btitem) ? \
- (IndexTupleDSize((btitem)->bti_itup) + \
- (sizeof(BTItemData) - sizeof(IndexTupleData))) : \
- 0)
-#define SPCLEFT(tape) \
- (sizeof((tape)->bttb_data) - (tape)->bttb_top)
-#define EMPTYTAPE(tape) \
- ((tape)->bttb_ntup <= 0)
-#define BTTAPEMAGIC 0x19660226
+#define BTITEMSZ(btitem) \
+ ((btitem) ? \
+ (IndexTupleDSize((btitem)->bti_itup) + \
+ (sizeof(BTItemData) - sizeof(IndexTupleData))) : \
+ 0)
+#define SPCLEFT(tape) \
+ (sizeof((tape)->bttb_data) - (tape)->bttb_top)
+#define EMPTYTAPE(tape) \
+ ((tape)->bttb_ntup <= 0)
+#define BTTAPEMAGIC 0x19660226
/*
* reset the tape header for its next use without doing anything to
- * the physical tape file. (setting bttb_top to 0 makes the block
+ * the physical tape file. (setting bttb_top to 0 makes the block
* empty.)
*/
static void
-_bt_tapereset(BTTapeBlock *tape)
+_bt_tapereset(BTTapeBlock * tape)
{
- tape->bttb_eor = 0;
- tape->bttb_top = 0;
- tape->bttb_ntup = 0;
+ tape->bttb_eor = 0;
+ tape->bttb_top = 0;
+ tape->bttb_ntup = 0;
}
/*
* rewind the physical tape file.
*/
static void
-_bt_taperewind(BTTapeBlock *tape)
+_bt_taperewind(BTTapeBlock * tape)
{
- FileSeek(tape->bttb_fd, 0, SEEK_SET);
+ FileSeek(tape->bttb_fd, 0, SEEK_SET);
}
/*
@@ -382,17 +402,17 @@ _bt_taperewind(BTTapeBlock *tape)
* least you don't have to delete and reinsert the directory entries.
*/
static void
-_bt_tapeclear(BTTapeBlock *tape)
+_bt_tapeclear(BTTapeBlock * tape)
{
- /* blow away the contents of the old file */
- _bt_taperewind(tape);
+ /* blow away the contents of the old file */
+ _bt_taperewind(tape);
#if 0
- FileSync(tape->bttb_fd);
+ FileSync(tape->bttb_fd);
#endif
- FileTruncate(tape->bttb_fd, 0);
+ FileTruncate(tape->bttb_fd, 0);
- /* reset the buffer */
- _bt_tapereset(tape);
+ /* reset the buffer */
+ _bt_tapereset(tape);
}
/*
@@ -402,43 +422,44 @@ _bt_tapeclear(BTTapeBlock *tape)
static BTTapeBlock *
_bt_tapecreate(char *fname)
{
- BTTapeBlock *tape = (BTTapeBlock *) palloc(sizeof(BTTapeBlock));
+ BTTapeBlock *tape = (BTTapeBlock *) palloc(sizeof(BTTapeBlock));
- if (tape == (BTTapeBlock *) NULL) {
- elog(WARN, "_bt_tapecreate: out of memory");
- }
+ if (tape == (BTTapeBlock *) NULL)
+ {
+ elog(WARN, "_bt_tapecreate: out of memory");
+ }
- tape->bttb_magic = BTTAPEMAGIC;
+ tape->bttb_magic = BTTAPEMAGIC;
- tape->bttb_fd = FileNameOpenFile(fname, O_RDWR|O_CREAT|O_TRUNC, 0600);
- Assert(tape->bttb_fd >= 0);
+ tape->bttb_fd = FileNameOpenFile(fname, O_RDWR | O_CREAT | O_TRUNC, 0600);
+ Assert(tape->bttb_fd >= 0);
- /* initialize the buffer */
- _bt_tapereset(tape);
+ /* initialize the buffer */
+ _bt_tapereset(tape);
- return(tape);
+ return (tape);
}
/*
* destroy the BTTapeBlock structure and its physical tape file.
*/
static void
-_bt_tapedestroy(BTTapeBlock *tape)
+_bt_tapedestroy(BTTapeBlock * tape)
{
- FileUnlink(tape->bttb_fd);
- pfree((void *) tape);
+ FileUnlink(tape->bttb_fd);
+ pfree((void *) tape);
}
/*
* flush the tape block to the file, marking End-Of-Run if requested.
*/
static void
-_bt_tapewrite(BTTapeBlock *tape, int eor)
+_bt_tapewrite(BTTapeBlock * tape, int eor)
{
- tape->bttb_eor = eor;
- FileWrite(tape->bttb_fd, (char *) tape, TAPEBLCKSZ);
- NDirectFileWrite += TAPEBLCKSZ/MAXBLCKSZ;
- _bt_tapereset(tape);
+ tape->bttb_eor = eor;
+ FileWrite(tape->bttb_fd, (char *) tape, TAPEBLCKSZ);
+ NDirectFileWrite += TAPEBLCKSZ / MAXBLCKSZ;
+ _bt_tapereset(tape);
}
/*
@@ -447,34 +468,36 @@ _bt_tapewrite(BTTapeBlock *tape, int eor)
*
* returns:
* - 0 if there are no more blocks in the tape or in this run (call
- * _bt_tapereset to clear the End-Of-Run marker)
+ * _bt_tapereset to clear the End-Of-Run marker)
* - 1 if a valid block was read
*/
static int
-_bt_taperead(BTTapeBlock *tape)
+_bt_taperead(BTTapeBlock * tape)
{
- int fd;
- int nread;
-
- if (tape->bttb_eor) {
- return(0); /* we are already at End-Of-Run */
- }
-
- /*
- * we're clobbering the old tape block, but we do need to save the
- * VFD (the one in the block we're reading is bogus).
- */
- fd = tape->bttb_fd;
- nread = FileRead(fd, (char *) tape, TAPEBLCKSZ);
- tape->bttb_fd = fd;
-
- if (nread != TAPEBLCKSZ) {
- Assert(nread == 0); /* we are at EOF */
- return(0);
- }
- Assert(tape->bttb_magic == BTTAPEMAGIC);
- NDirectFileRead += TAPEBLCKSZ/MAXBLCKSZ;
- return(1);
+ int fd;
+ int nread;
+
+ if (tape->bttb_eor)
+ {
+ return (0); /* we are already at End-Of-Run */
+ }
+
+ /*
+ * we're clobbering the old tape block, but we do need to save the VFD
+ * (the one in the block we're reading is bogus).
+ */
+ fd = tape->bttb_fd;
+ nread = FileRead(fd, (char *) tape, TAPEBLCKSZ);
+ tape->bttb_fd = fd;
+
+ if (nread != TAPEBLCKSZ)
+ {
+ Assert(nread == 0); /* we are at EOF */
+ return (0);
+ }
+ Assert(tape->bttb_magic == BTTAPEMAGIC);
+ NDirectFileRead += TAPEBLCKSZ / MAXBLCKSZ;
+ return (1);
}
/*
@@ -487,19 +510,20 @@ _bt_taperead(BTTapeBlock *tape)
* side effects:
* - sets 'pos' to the current position within the block.
*/
-static BTItem
-_bt_tapenext(BTTapeBlock *tape, char **pos)
+static BTItem
+_bt_tapenext(BTTapeBlock * tape, char **pos)
{
- Size itemsz;
- BTItem bti;
-
- if (*pos >= tape->bttb_data + tape->bttb_top) {
- return((BTItem) NULL);
- }
- bti = (BTItem) *pos;
- itemsz = BTITEMSZ(bti);
- *pos += DOUBLEALIGN(itemsz);
- return(bti);
+ Size itemsz;
+ BTItem bti;
+
+ if (*pos >= tape->bttb_data + tape->bttb_top)
+ {
+ return ((BTItem) NULL);
+ }
+ bti = (BTItem) * pos;
+ itemsz = BTITEMSZ(bti);
+ *pos += DOUBLEALIGN(itemsz);
+ return (bti);
}
/*
@@ -514,11 +538,11 @@ _bt_tapenext(BTTapeBlock *tape, char **pos)
* the beginning of free space.
*/
static void
-_bt_tapeadd(BTTapeBlock *tape, BTItem item, int itemsz)
+_bt_tapeadd(BTTapeBlock * tape, BTItem item, int itemsz)
{
- memcpy(tape->bttb_data + tape->bttb_top, item, itemsz);
- ++tape->bttb_ntup;
- tape->bttb_top += DOUBLEALIGN(itemsz);
+ memcpy(tape->bttb_data + tape->bttb_top, item, itemsz);
+ ++tape->bttb_ntup;
+ tape->bttb_top += DOUBLEALIGN(itemsz);
}
/*-------------------------------------------------------------------------
@@ -530,41 +554,44 @@ _bt_tapeadd(BTTapeBlock *tape, BTItem item, int itemsz)
* create and initialize a spool structure, including the underlying
* files.
*/
-void *
+void *
_bt_spoolinit(Relation index, int ntapes, bool isunique)
{
- BTSpool *btspool = (BTSpool *) palloc(sizeof(BTSpool));
- int i;
- char *fname = (char *) palloc(sizeof(TAPETEMP) + 1);
-
- if (btspool == (BTSpool *) NULL || fname == (char *) NULL) {
- elog(WARN, "_bt_spoolinit: out of memory");
- }
- memset((char *) btspool, 0, sizeof(BTSpool));
- btspool->bts_ntapes = ntapes;
- btspool->bts_tape = 0;
- btspool->isunique = isunique;
-
- btspool->bts_itape =
- (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes);
- btspool->bts_otape =
- (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes);
- if (btspool->bts_itape == (BTTapeBlock **) NULL ||
- btspool->bts_otape == (BTTapeBlock **) NULL) {
- elog(WARN, "_bt_spoolinit: out of memory");
- }
-
- for (i = 0; i < ntapes; ++i) {
- btspool->bts_itape[i] =
- _bt_tapecreate(mktemp(strcpy(fname, TAPETEMP)));
- btspool->bts_otape[i] =
- _bt_tapecreate(mktemp(strcpy(fname, TAPETEMP)));
- }
- pfree((void *) fname);
-
- _bt_isortcmpinit(index, btspool);
-
- return((void *) btspool);
+ BTSpool *btspool = (BTSpool *) palloc(sizeof(BTSpool));
+ int i;
+ char *fname = (char *) palloc(sizeof(TAPETEMP) + 1);
+
+ if (btspool == (BTSpool *) NULL || fname == (char *) NULL)
+ {
+ elog(WARN, "_bt_spoolinit: out of memory");
+ }
+ memset((char *) btspool, 0, sizeof(BTSpool));
+ btspool->bts_ntapes = ntapes;
+ btspool->bts_tape = 0;
+ btspool->isunique = isunique;
+
+ btspool->bts_itape =
+ (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes);
+ btspool->bts_otape =
+ (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes);
+ if (btspool->bts_itape == (BTTapeBlock **) NULL ||
+ btspool->bts_otape == (BTTapeBlock **) NULL)
+ {
+ elog(WARN, "_bt_spoolinit: out of memory");
+ }
+
+ for (i = 0; i < ntapes; ++i)
+ {
+ btspool->bts_itape[i] =
+ _bt_tapecreate(mktemp(strcpy(fname, TAPETEMP)));
+ btspool->bts_otape[i] =
+ _bt_tapecreate(mktemp(strcpy(fname, TAPETEMP)));
+ }
+ pfree((void *) fname);
+
+ _bt_isortcmpinit(index, btspool);
+
+ return ((void *) btspool);
}
/*
@@ -573,29 +600,32 @@ _bt_spoolinit(Relation index, int ntapes, bool isunique)
void
_bt_spooldestroy(void *spool)
{
- BTSpool *btspool = (BTSpool *) spool;
- int i;
-
- for (i = 0; i < btspool->bts_ntapes; ++i) {
- _bt_tapedestroy(btspool->bts_otape[i]);
- _bt_tapedestroy(btspool->bts_itape[i]);
- }
- pfree((void *) btspool);
+ BTSpool *btspool = (BTSpool *) spool;
+ int i;
+
+ for (i = 0; i < btspool->bts_ntapes; ++i)
+ {
+ _bt_tapedestroy(btspool->bts_otape[i]);
+ _bt_tapedestroy(btspool->bts_itape[i]);
+ }
+ pfree((void *) btspool);
}
/*
* flush out any dirty output tape blocks
*/
static void
-_bt_spoolflush(BTSpool *btspool)
+_bt_spoolflush(BTSpool * btspool)
{
- int i;
+ int i;
- for (i = 0; i < btspool->bts_ntapes; ++i) {
- if (!EMPTYTAPE(btspool->bts_otape[i])) {
- _bt_tapewrite(btspool->bts_otape[i], 1);
+ for (i = 0; i < btspool->bts_ntapes; ++i)
+ {
+ if (!EMPTYTAPE(btspool->bts_otape[i]))
+ {
+ _bt_tapewrite(btspool->bts_otape[i], 1);
+ }
}
- }
}
/*
@@ -605,36 +635,37 @@ _bt_spoolflush(BTSpool *btspool)
* output tapes.
*/
static void
-_bt_spoolswap(BTSpool *btspool)
+_bt_spoolswap(BTSpool * btspool)
{
- File tmpfd;
- BTTapeBlock *itape;
- BTTapeBlock *otape;
- int i;
+ File tmpfd;
+ BTTapeBlock *itape;
+ BTTapeBlock *otape;
+ int i;
- for (i = 0; i < btspool->bts_ntapes; ++i) {
- itape = btspool->bts_itape[i];
- otape = btspool->bts_otape[i];
+ for (i = 0; i < btspool->bts_ntapes; ++i)
+ {
+ itape = btspool->bts_itape[i];
+ otape = btspool->bts_otape[i];
- /*
- * swap the input and output VFDs.
- */
- tmpfd = itape->bttb_fd;
- itape->bttb_fd = otape->bttb_fd;
- otape->bttb_fd = tmpfd;
+ /*
+ * swap the input and output VFDs.
+ */
+ tmpfd = itape->bttb_fd;
+ itape->bttb_fd = otape->bttb_fd;
+ otape->bttb_fd = tmpfd;
- /*
- * rewind the new input tape.
- */
- _bt_taperewind(itape);
- _bt_tapereset(itape);
+ /*
+ * rewind the new input tape.
+ */
+ _bt_taperewind(itape);
+ _bt_tapereset(itape);
- /*
- * clear the new output tape -- it's ok to throw away the old
- * inputs.
- */
- _bt_tapeclear(otape);
- }
+ /*
+ * clear the new output tape -- it's ok to throw away the old
+ * inputs.
+ */
+ _bt_tapeclear(otape);
+ }
}
/*-------------------------------------------------------------------------
@@ -643,7 +674,7 @@ _bt_spoolswap(BTSpool *btspool)
*/
/*
- * spool 'btitem' into an initial run. as tape blocks are filled, the
+ * spool 'btitem' into an initial run. as tape blocks are filled, the
* block BTItems are qsorted and written into some output tape (it
* doesn't matter which; we go round-robin for simplicity). the
* initial runs are therefore always just one block.
@@ -651,134 +682,137 @@ _bt_spoolswap(BTSpool *btspool)
void
_bt_spool(Relation index, BTItem btitem, void *spool)
{
- BTSpool *btspool = (BTSpool *) spool;
- BTTapeBlock *itape;
- Size itemsz;
-
- _bt_isortcmpinit (index, btspool);
-
- itape = btspool->bts_itape[btspool->bts_tape];
- itemsz = BTITEMSZ(btitem);
- itemsz = DOUBLEALIGN(itemsz);
-
- /*
- * if this buffer is too full for this BTItemData, or if we have
- * run out of BTItems, we need to sort the buffer and write it
- * out. in this case, the BTItemData will go into the next tape's
- * buffer.
- */
- if (btitem == (BTItem) NULL || SPCLEFT(itape) < itemsz) {
- BTSortKey *parray = (BTSortKey *) NULL;
- BTTapeBlock *otape;
- BTItem bti;
- char *pos;
- int btisz;
- int it_ntup = itape->bttb_ntup;
- int i;
+ BTSpool *btspool = (BTSpool *) spool;
+ BTTapeBlock *itape;
+ Size itemsz;
- /*
- * build an array of pointers to the BTItemDatas on the input
- * block.
- */
- if (it_ntup > 0) {
- parray =
- (BTSortKey *) palloc(it_ntup * sizeof(BTSortKey));
- pos = itape->bttb_data;
- for (i = 0; i < it_ntup; ++i) {
- _bt_setsortkey(index, _bt_tapenext(itape, &pos), &(parray[i]));
- }
-
- /*
- * qsort the pointer array.
- */
- qsort((void *) parray, it_ntup, sizeof(BTSortKey),
- (int (*)(const void *,const void *))_bt_isortcmp);
- }
+ _bt_isortcmpinit(index, btspool);
+
+ itape = btspool->bts_itape[btspool->bts_tape];
+ itemsz = BTITEMSZ(btitem);
+ itemsz = DOUBLEALIGN(itemsz);
/*
- * write the spooled run into the output tape. we copy the
- * BTItemDatas in the order dictated by the sorted array of
- * BTItems, not the original order.
- *
- * (since everything was DOUBLEALIGN'd and is all on a single
- * tape block, everything had *better* still fit on one tape
- * block..)
+ * if this buffer is too full for this BTItemData, or if we have run
+ * out of BTItems, we need to sort the buffer and write it out. in
+ * this case, the BTItemData will go into the next tape's buffer.
*/
- otape = btspool->bts_otape[btspool->bts_tape];
- for (i = 0; i < it_ntup; ++i) {
- bti = parray[i].btsk_item;
- btisz = BTITEMSZ(bti);
- btisz = DOUBLEALIGN(btisz);
- _bt_tapeadd(otape, bti, btisz);
+ if (btitem == (BTItem) NULL || SPCLEFT(itape) < itemsz)
+ {
+ BTSortKey *parray = (BTSortKey *) NULL;
+ BTTapeBlock *otape;
+ BTItem bti;
+ char *pos;
+ int btisz;
+ int it_ntup = itape->bttb_ntup;
+ int i;
+
+ /*
+ * build an array of pointers to the BTItemDatas on the input
+ * block.
+ */
+ if (it_ntup > 0)
+ {
+ parray =
+ (BTSortKey *) palloc(it_ntup * sizeof(BTSortKey));
+ pos = itape->bttb_data;
+ for (i = 0; i < it_ntup; ++i)
+ {
+ _bt_setsortkey(index, _bt_tapenext(itape, &pos), &(parray[i]));
+ }
+
+ /*
+ * qsort the pointer array.
+ */
+ qsort((void *) parray, it_ntup, sizeof(BTSortKey),
+ (int (*) (const void *, const void *)) _bt_isortcmp);
+ }
+
+ /*
+ * write the spooled run into the output tape. we copy the
+ * BTItemDatas in the order dictated by the sorted array of
+ * BTItems, not the original order.
+ *
+ * (since everything was DOUBLEALIGN'd and is all on a single tape
+ * block, everything had *better* still fit on one tape block..)
+ */
+ otape = btspool->bts_otape[btspool->bts_tape];
+ for (i = 0; i < it_ntup; ++i)
+ {
+ bti = parray[i].btsk_item;
+ btisz = BTITEMSZ(bti);
+ btisz = DOUBLEALIGN(btisz);
+ _bt_tapeadd(otape, bti, btisz);
#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_SPOOL)
- {
- bool isnull;
- Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att,
- &isnull);
- printf("_bt_spool: inserted <%x> into output tape %d\n",
- d, btspool->bts_tape);
- }
-#endif /* FASTBUILD_DEBUG && FASTBUILD_SPOOL */
- }
+ {
+ bool isnull;
+ Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att,
+ &isnull);
- /*
- * the initial runs are always single tape blocks. flush the
- * output block, marking End-Of-Run.
- */
- _bt_tapewrite(otape, 1);
+ printf("_bt_spool: inserted <%x> into output tape %d\n",
+ d, btspool->bts_tape);
+ }
+#endif /* FASTBUILD_DEBUG && FASTBUILD_SPOOL */
+ }
- /*
- * reset the input buffer for the next run. we don't have to
- * write it out or anything -- we only use it to hold the
- * unsorted BTItemDatas, the output tape contains all the
- * sorted stuff.
- *
- * changing bts_tape changes the output tape and input tape;
- * we change itape for the code below.
- */
- _bt_tapereset(itape);
- btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes;
- itape = btspool->bts_itape[btspool->bts_tape];
+ /*
+ * the initial runs are always single tape blocks. flush the
+ * output block, marking End-Of-Run.
+ */
+ _bt_tapewrite(otape, 1);
- /*
- * destroy the pointer array.
- */
- if (parray != (BTSortKey *) NULL)
- {
- for (i = 0; i < it_ntup; i++)
- {
- if ( parray[i].btsk_datum != (Datum*) NULL )
- pfree ((void*)(parray[i].btsk_datum));
- if ( parray[i].btsk_nulls != (char*) NULL )
- pfree ((void*)(parray[i].btsk_nulls));
- }
- pfree((void *) parray);
+ /*
+ * reset the input buffer for the next run. we don't have to
+ * write it out or anything -- we only use it to hold the unsorted
+ * BTItemDatas, the output tape contains all the sorted stuff.
+ *
+ * changing bts_tape changes the output tape and input tape; we
+ * change itape for the code below.
+ */
+ _bt_tapereset(itape);
+ btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes;
+ itape = btspool->bts_itape[btspool->bts_tape];
+
+ /*
+ * destroy the pointer array.
+ */
+ if (parray != (BTSortKey *) NULL)
+ {
+ for (i = 0; i < it_ntup; i++)
+ {
+ if (parray[i].btsk_datum != (Datum *) NULL)
+ pfree((void *) (parray[i].btsk_datum));
+ if (parray[i].btsk_nulls != (char *) NULL)
+ pfree((void *) (parray[i].btsk_nulls));
+ }
+ pfree((void *) parray);
+ }
}
- }
- /* insert this item into the current buffer */
- if (btitem != (BTItem) NULL) {
- _bt_tapeadd(itape, btitem, itemsz);
- }
+ /* insert this item into the current buffer */
+ if (btitem != (BTItem) NULL)
+ {
+ _bt_tapeadd(itape, btitem, itemsz);
+ }
}
/*
* allocate a new, clean btree page, not linked to any siblings.
*/
static void
-_bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags)
+_bt_blnewpage(Relation index, Buffer * buf, Page * page, int flags)
{
- BTPageOpaque opaque;
+ BTPageOpaque opaque;
- *buf = _bt_getbuf(index, P_NEW, BT_WRITE);
+ *buf = _bt_getbuf(index, P_NEW, BT_WRITE);
#if 0
- printf("\tblk=%d\n", BufferGetBlockNumber(*buf));
+ printf("\tblk=%d\n", BufferGetBlockNumber(*buf));
#endif
- *page = BufferGetPage(*buf);
- _bt_pageinit(*page, BufferGetPageSize(*buf));
- opaque = (BTPageOpaque) PageGetSpecialPointer(*page);
- opaque->btpo_prev = opaque->btpo_next = P_NONE;
- opaque->btpo_flags = flags;
+ *page = BufferGetPage(*buf);
+ _bt_pageinit(*page, BufferGetPageSize(*buf));
+ opaque = (BTPageOpaque) PageGetSpecialPointer(*page);
+ opaque->btpo_prev = opaque->btpo_next = P_NONE;
+ opaque->btpo_flags = flags;
}
/*
@@ -790,42 +824,44 @@ _bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags)
static void
_bt_slideleft(Relation index, Buffer buf, Page page)
{
- OffsetNumber off;
- OffsetNumber maxoff;
- ItemId previi;
- ItemId thisii;
-
- if (!PageIsEmpty(page)) {
- maxoff = PageGetMaxOffsetNumber(page);
- previi = PageGetItemId(page, P_HIKEY);
- for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off)) {
- thisii = PageGetItemId(page, off);
- *previi = *thisii;
- previi = thisii;
+ OffsetNumber off;
+ OffsetNumber maxoff;
+ ItemId previi;
+ ItemId thisii;
+
+ if (!PageIsEmpty(page))
+ {
+ maxoff = PageGetMaxOffsetNumber(page);
+ previi = PageGetItemId(page, P_HIKEY);
+ for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off))
+ {
+ thisii = PageGetItemId(page, off);
+ *previi = *thisii;
+ previi = thisii;
+ }
+ ((PageHeader) page)->pd_lower -= sizeof(ItemIdData);
}
- ((PageHeader) page)->pd_lower -= sizeof(ItemIdData);
- }
}
/*
* allocate and initialize a new BTPageState. the returned structure
* is suitable for immediate use by _bt_buildadd.
*/
-static void *
+static void *
_bt_pagestate(Relation index, int flags, int level, bool doupper)
{
- BTPageState *state = (BTPageState *) palloc(sizeof(BTPageState));
-
- memset((char *) state, 0, sizeof(BTPageState));
- _bt_blnewpage(index, &(state->btps_buf), &(state->btps_page), flags);
- state->btps_firstoff = InvalidOffsetNumber;
- state->btps_lastoff = P_HIKEY;
- state->btps_lastbti = (BTItem) NULL;
- state->btps_next = (BTPageState *) NULL;
- state->btps_level = level;
- state->btps_doupper = doupper;
-
- return((void *) state);
+ BTPageState *state = (BTPageState *) palloc(sizeof(BTPageState));
+
+ memset((char *) state, 0, sizeof(BTPageState));
+ _bt_blnewpage(index, &(state->btps_buf), &(state->btps_page), flags);
+ state->btps_firstoff = InvalidOffsetNumber;
+ state->btps_lastoff = P_HIKEY;
+ state->btps_lastbti = (BTItem) NULL;
+ state->btps_next = (BTPageState *) NULL;
+ state->btps_level = level;
+ state->btps_doupper = doupper;
+
+ return ((void *) state);
}
/*
@@ -834,19 +870,19 @@ _bt_pagestate(Relation index, int flags, int level, bool doupper)
* the page to which the item used to point, e.g., a heap page if
* 'opage' is a leaf page).
*/
-static BTItem
+static BTItem
_bt_minitem(Page opage, BlockNumber oblkno, int atend)
{
- OffsetNumber off;
- BTItem obti;
- BTItem nbti;
+ OffsetNumber off;
+ BTItem obti;
+ BTItem nbti;
- off = atend ? P_HIKEY : P_FIRSTKEY;
- obti = (BTItem) PageGetItem(opage, PageGetItemId(opage, off));
- nbti = _bt_formitem(&(obti->bti_itup));
- ItemPointerSet(&(nbti->bti_itup.t_tid), oblkno, P_HIKEY);
+ off = atend ? P_HIKEY : P_FIRSTKEY;
+ obti = (BTItem) PageGetItem(opage, PageGetItemId(opage, off));
+ nbti = _bt_formitem(&(obti->bti_itup));
+ ItemPointerSet(&(nbti->bti_itup.t_tid), oblkno, P_HIKEY);
- return(nbti);
+ return (nbti);
}
/*
@@ -855,26 +891,26 @@ _bt_minitem(Page opage, BlockNumber oblkno, int atend)
* we must be careful to observe the following restrictions, placed
* upon us by the conventions in nbtsearch.c:
* - rightmost pages start data items at P_HIKEY instead of at
- * P_FIRSTKEY.
+ * P_FIRSTKEY.
* - duplicates cannot be split among pages unless the chain of
- * duplicates starts at the first data item.
+ * duplicates starts at the first data item.
*
* a leaf page being built looks like:
*
* +----------------+---------------------------------+
- * | PageHeaderData | linp0 linp1 linp2 ... |
+ * | PageHeaderData | linp0 linp1 linp2 ... |
* +-----------+----+---------------------------------+
- * | ... linpN | ^ first |
+ * | ... linpN | ^ first |
* +-----------+--------------------------------------+
- * | ^ last |
- * | |
- * | v last |
+ * | ^ last |
+ * | |
+ * | v last |
* +-------------+------------------------------------+
- * | | itemN ... |
+ * | | itemN ... |
* +-------------+------------------+-----------------+
- * | ... item3 item2 item1 | "special space" |
+ * | ... item3 item2 item1 | "special space" |
* +--------------------------------+-----------------+
- * ^ first
+ * ^ first
*
* contrast this with the diagram in bufpage.h; note the mismatch
* between linps and items. this is because we reserve linp0 as a
@@ -888,216 +924,230 @@ _bt_minitem(Page opage, BlockNumber oblkno, int atend)
*
* if all keys are unique, 'first' will always be the same as 'last'.
*/
-static BTItem
+static BTItem
_bt_buildadd(Relation index, void *pstate, BTItem bti, int flags)
{
- BTPageState *state = (BTPageState *) pstate;
- Buffer nbuf;
- Page npage;
- BTItem last_bti;
- OffsetNumber first_off;
- OffsetNumber last_off;
- OffsetNumber off;
- Size pgspc;
- Size btisz;
-
- nbuf = state->btps_buf;
- npage = state->btps_page;
- first_off = state->btps_firstoff;
- last_off = state->btps_lastoff;
- last_bti = state->btps_lastbti;
-
- pgspc = PageGetFreeSpace(npage);
- btisz = BTITEMSZ(bti);
- btisz = DOUBLEALIGN(btisz);
- if (pgspc < btisz) {
- Buffer obuf = nbuf;
- Page opage = npage;
- OffsetNumber o, n;
- ItemId ii;
- ItemId hii;
-
- _bt_blnewpage(index, &nbuf, &npage, flags);
+ BTPageState *state = (BTPageState *) pstate;
+ Buffer nbuf;
+ Page npage;
+ BTItem last_bti;
+ OffsetNumber first_off;
+ OffsetNumber last_off;
+ OffsetNumber off;
+ Size pgspc;
+ Size btisz;
+
+ nbuf = state->btps_buf;
+ npage = state->btps_page;
+ first_off = state->btps_firstoff;
+ last_off = state->btps_lastoff;
+ last_bti = state->btps_lastbti;
+
+ pgspc = PageGetFreeSpace(npage);
+ btisz = BTITEMSZ(bti);
+ btisz = DOUBLEALIGN(btisz);
+ if (pgspc < btisz)
+ {
+ Buffer obuf = nbuf;
+ Page opage = npage;
+ OffsetNumber o,
+ n;
+ ItemId ii;
+ ItemId hii;
- /*
- * if 'last' is part of a chain of duplicates that does not
- * start at the beginning of the old page, the entire chain is
- * copied to the new page; we delete all of the duplicates
- * from the old page except the first, which becomes the high
- * key item of the old page.
- *
- * if the chain starts at the beginning of the page or there
- * is no chain ('first' == 'last'), we need only copy 'last'
- * to the new page. again, 'first' (== 'last') becomes the
- * high key of the old page.
- *
- * note that in either case, we copy at least one item to the
- * new page, so 'last_bti' will always be valid. 'bti' will
- * never be the first data item on the new page.
- */
- if (first_off == P_FIRSTKEY) {
- Assert(last_off != P_FIRSTKEY);
- first_off = last_off;
- }
- for (o = first_off, n = P_FIRSTKEY;
- o <= last_off;
- o = OffsetNumberNext(o), n = OffsetNumberNext(n)) {
- ii = PageGetItemId(opage, o);
- if ( PageAddItem(npage, PageGetItem(opage, ii),
- ii->lp_len, n, LP_USED) == InvalidOffsetNumber )
- elog (FATAL, "btree: failed to add item to the page in _bt_sort (1)");
+ _bt_blnewpage(index, &nbuf, &npage, flags);
+
+ /*
+ * if 'last' is part of a chain of duplicates that does not start
+ * at the beginning of the old page, the entire chain is copied to
+ * the new page; we delete all of the duplicates from the old page
+ * except the first, which becomes the high key item of the old
+ * page.
+ *
+ * if the chain starts at the beginning of the page or there is no
+ * chain ('first' == 'last'), we need only copy 'last' to the new
+ * page. again, 'first' (== 'last') becomes the high key of the
+ * old page.
+ *
+ * note that in either case, we copy at least one item to the new
+ * page, so 'last_bti' will always be valid. 'bti' will never be
+ * the first data item on the new page.
+ */
+ if (first_off == P_FIRSTKEY)
+ {
+ Assert(last_off != P_FIRSTKEY);
+ first_off = last_off;
+ }
+ for (o = first_off, n = P_FIRSTKEY;
+ o <= last_off;
+ o = OffsetNumberNext(o), n = OffsetNumberNext(n))
+ {
+ ii = PageGetItemId(opage, o);
+ if (PageAddItem(npage, PageGetItem(opage, ii),
+ ii->lp_len, n, LP_USED) == InvalidOffsetNumber)
+ elog(FATAL, "btree: failed to add item to the page in _bt_sort (1)");
#if 0
#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
- {
- bool isnull;
- BTItem tmpbti =
- (BTItem) PageGetItem(npage, PageGetItemId(npage, n));
- Datum d = index_getattr(&(tmpbti->bti_itup), 1,
- index->rd_att, &isnull);
- printf("_bt_buildadd: moved <%x> to offset %d at level %d\n",
- d, n, state->btps_level);
- }
-#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
+ {
+ bool isnull;
+ BTItem tmpbti =
+ (BTItem) PageGetItem(npage, PageGetItemId(npage, n));
+ Datum d = index_getattr(&(tmpbti->bti_itup), 1,
+ index->rd_att, &isnull);
+
+ printf("_bt_buildadd: moved <%x> to offset %d at level %d\n",
+ d, n, state->btps_level);
+ }
+#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
#endif
- }
- /*
- * this loop is backward because PageIndexTupleDelete shuffles
- * the tuples to fill holes in the page -- by starting at the
- * end and working back, we won't create holes (and thereby
- * avoid shuffling).
- */
- for (o = last_off; o > first_off; o = OffsetNumberPrev(o)) {
- PageIndexTupleDelete(opage, o);
- }
- hii = PageGetItemId(opage, P_HIKEY);
- ii = PageGetItemId(opage, first_off);
- *hii = *ii;
- ii->lp_flags &= ~LP_USED;
- ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
+ }
- first_off = P_FIRSTKEY;
- last_off = PageGetMaxOffsetNumber(npage);
- last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, last_off));
+ /*
+ * this loop is backward because PageIndexTupleDelete shuffles the
+ * tuples to fill holes in the page -- by starting at the end and
+ * working back, we won't create holes (and thereby avoid
+ * shuffling).
+ */
+ for (o = last_off; o > first_off; o = OffsetNumberPrev(o))
+ {
+ PageIndexTupleDelete(opage, o);
+ }
+ hii = PageGetItemId(opage, P_HIKEY);
+ ii = PageGetItemId(opage, first_off);
+ *hii = *ii;
+ ii->lp_flags &= ~LP_USED;
+ ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
- /*
- * set the page (side link) pointers.
- */
- {
- BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage);
- BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(npage);
-
- oopaque->btpo_next = BufferGetBlockNumber(nbuf);
- nopaque->btpo_prev = BufferGetBlockNumber(obuf);
- nopaque->btpo_next = P_NONE;
-
- if ( _bt_itemcmp(index, _bt_nattr,
- (BTItem) PageGetItem(opage, PageGetItemId(opage, P_HIKEY)),
- (BTItem) PageGetItem(opage, PageGetItemId(opage, P_FIRSTKEY)),
- BTEqualStrategyNumber) )
- oopaque->btpo_flags |= BTP_CHAIN;
- }
+ first_off = P_FIRSTKEY;
+ last_off = PageGetMaxOffsetNumber(npage);
+ last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, last_off));
- /*
- * copy the old buffer's minimum key to its parent. if we
- * don't have a parent, we have to create one; this adds a new
- * btree level.
- */
- if (state->btps_doupper) {
- BTItem nbti;
-
- if (state->btps_next == (BTPageState *) NULL) {
- state->btps_next =
- _bt_pagestate(index, 0, state->btps_level + 1, true);
- }
- nbti = _bt_minitem(opage, BufferGetBlockNumber(obuf), 0);
- _bt_buildadd(index, state->btps_next, nbti, 0);
- pfree((void *) nbti);
+ /*
+ * set the page (side link) pointers.
+ */
+ {
+ BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage);
+ BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(npage);
+
+ oopaque->btpo_next = BufferGetBlockNumber(nbuf);
+ nopaque->btpo_prev = BufferGetBlockNumber(obuf);
+ nopaque->btpo_next = P_NONE;
+
+ if (_bt_itemcmp(index, _bt_nattr,
+ (BTItem) PageGetItem(opage, PageGetItemId(opage, P_HIKEY)),
+ (BTItem) PageGetItem(opage, PageGetItemId(opage, P_FIRSTKEY)),
+ BTEqualStrategyNumber))
+ oopaque->btpo_flags |= BTP_CHAIN;
+ }
+
+ /*
+ * copy the old buffer's minimum key to its parent. if we don't
+ * have a parent, we have to create one; this adds a new btree
+ * level.
+ */
+ if (state->btps_doupper)
+ {
+ BTItem nbti;
+
+ if (state->btps_next == (BTPageState *) NULL)
+ {
+ state->btps_next =
+ _bt_pagestate(index, 0, state->btps_level + 1, true);
+ }
+ nbti = _bt_minitem(opage, BufferGetBlockNumber(obuf), 0);
+ _bt_buildadd(index, state->btps_next, nbti, 0);
+ pfree((void *) nbti);
+ }
+
+ /*
+ * write out the old stuff. we never want to see it again, so we
+ * can give up our lock (if we had one; BuildingBtree is set, so
+ * we aren't locking).
+ */
+ _bt_wrtbuf(index, obuf);
}
/*
- * write out the old stuff. we never want to see it again, so
- * we can give up our lock (if we had one; BuildingBtree is
- * set, so we aren't locking).
+ * if this item is different from the last item added, we start a new
+ * chain of duplicates.
*/
- _bt_wrtbuf(index, obuf);
- }
-
- /*
- * if this item is different from the last item added, we start a
- * new chain of duplicates.
- */
- off = OffsetNumberNext(last_off);
- if ( PageAddItem(npage, (Item) bti, btisz, off, LP_USED) == InvalidOffsetNumber )
- elog (FATAL, "btree: failed to add item to the page in _bt_sort (2)");
+ off = OffsetNumberNext(last_off);
+ if (PageAddItem(npage, (Item) bti, btisz, off, LP_USED) == InvalidOffsetNumber)
+ elog(FATAL, "btree: failed to add item to the page in _bt_sort (2)");
#if 0
#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
- {
- bool isnull;
- Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, &isnull);
- printf("_bt_buildadd: inserted <%x> at offset %d at level %d\n",
- d, off, state->btps_level);
- }
-#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
+ {
+ bool isnull;
+ Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, &isnull);
+
+ printf("_bt_buildadd: inserted <%x> at offset %d at level %d\n",
+ d, off, state->btps_level);
+ }
+#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
#endif
- if (last_bti == (BTItem) NULL)
- {
- first_off = P_FIRSTKEY;
- }
- else if ( !_bt_itemcmp(index, _bt_nattr,
- bti, last_bti, BTEqualStrategyNumber) )
- {
- first_off = off;
- }
- last_off = off;
- last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, off));
-
- state->btps_buf = nbuf;
- state->btps_page = npage;
- state->btps_lastbti = last_bti;
- state->btps_lastoff = last_off;
- state->btps_firstoff = first_off;
-
- return(last_bti);
+ if (last_bti == (BTItem) NULL)
+ {
+ first_off = P_FIRSTKEY;
+ }
+ else if (!_bt_itemcmp(index, _bt_nattr,
+ bti, last_bti, BTEqualStrategyNumber))
+ {
+ first_off = off;
+ }
+ last_off = off;
+ last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, off));
+
+ state->btps_buf = nbuf;
+ state->btps_page = npage;
+ state->btps_lastbti = last_bti;
+ state->btps_lastoff = last_off;
+ state->btps_firstoff = first_off;
+
+ return (last_bti);
}
static void
-_bt_uppershutdown(Relation index, BTPageState *state)
+_bt_uppershutdown(Relation index, BTPageState * state)
{
- BTPageState *s;
- BlockNumber blkno;
- BTPageOpaque opaque;
- BTItem bti;
+ BTPageState *s;
+ BlockNumber blkno;
+ BTPageOpaque opaque;
+ BTItem bti;
- for (s = state; s != (BTPageState *) NULL; s = s->btps_next) {
- blkno = BufferGetBlockNumber(s->btps_buf);
- opaque = (BTPageOpaque) PageGetSpecialPointer(s->btps_page);
+ for (s = state; s != (BTPageState *) NULL; s = s->btps_next)
+ {
+ blkno = BufferGetBlockNumber(s->btps_buf);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(s->btps_page);
- /*
- * if this is the root, attach it to the metapage. otherwise,
- * stick the minimum key of the last page on this level (which
- * has not been split, or else it wouldn't be the last page)
- * into its parent. this may cause the last page of upper
- * levels to split, but that's not a problem -- we haven't
- * gotten to them yet.
- */
- if (s->btps_doupper) {
- if (s->btps_next == (BTPageState *) NULL) {
- opaque->btpo_flags |= BTP_ROOT;
- _bt_metaproot(index, blkno, s->btps_level + 1);
- } else {
- bti = _bt_minitem(s->btps_page, blkno, 0);
- _bt_buildadd(index, s->btps_next, bti, 0);
- pfree((void *) bti);
- }
- }
+ /*
+ * if this is the root, attach it to the metapage. otherwise,
+ * stick the minimum key of the last page on this level (which has
+ * not been split, or else it wouldn't be the last page) into its
+ * parent. this may cause the last page of upper levels to split,
+ * but that's not a problem -- we haven't gotten to them yet.
+ */
+ if (s->btps_doupper)
+ {
+ if (s->btps_next == (BTPageState *) NULL)
+ {
+ opaque->btpo_flags |= BTP_ROOT;
+ _bt_metaproot(index, blkno, s->btps_level + 1);
+ }
+ else
+ {
+ bti = _bt_minitem(s->btps_page, blkno, 0);
+ _bt_buildadd(index, s->btps_next, bti, 0);
+ pfree((void *) bti);
+ }
+ }
- /*
- * this is the rightmost page, so the ItemId array needs to be
- * slid back one slot.
- */
- _bt_slideleft(index, s->btps_buf, s->btps_page);
- _bt_wrtbuf(index, s->btps_buf);
- }
+ /*
+ * this is the rightmost page, so the ItemId array needs to be
+ * slid back one slot.
+ */
+ _bt_slideleft(index, s->btps_buf, s->btps_page);
+ _bt_wrtbuf(index, s->btps_buf);
+ }
}
/*
@@ -1105,203 +1155,230 @@ _bt_uppershutdown(Relation index, BTPageState *state)
* merging passes until at most one run is left in each tape. at that
* point, merge the final tape runs into a set of btree leaves.
*
- * XXX three nested loops? gross. cut me up into smaller routines.
+ * XXX three nested loops? gross. cut me up into smaller routines.
*/
static void
-_bt_merge(Relation index, BTSpool *btspool)
+_bt_merge(Relation index, BTSpool * btspool)
{
- BTPageState *state;
- BTPriQueue q;
- BTPriQueueElem e;
- BTSortKey btsk;
- BTItem bti;
- BTTapeBlock *itape;
- BTTapeBlock *otape;
- char *tapepos[MAXTAPES];
- int tapedone[MAXTAPES];
- int t;
- int goodtapes;
- int npass;
- int nruns;
- Size btisz;
- bool doleaf = false;
-
- /*
- * initialize state needed for the merge into the btree leaf pages.
- */
- state = (BTPageState *) _bt_pagestate(index, BTP_LEAF, 0, true);
-
- npass = 0;
- do { /* pass */
+ BTPageState *state;
+ BTPriQueue q;
+ BTPriQueueElem e;
+ BTSortKey btsk;
+ BTItem bti;
+ BTTapeBlock *itape;
+ BTTapeBlock *otape;
+ char *tapepos[MAXTAPES];
+ int tapedone[MAXTAPES];
+ int t;
+ int goodtapes;
+ int npass;
+ int nruns;
+ Size btisz;
+ bool doleaf = false;
+
/*
- * each pass starts by flushing the previous outputs and
- * swapping inputs and outputs. flushing sets End-of-Run for
- * any dirty output tapes. swapping clears the new output
- * tapes and rewinds the new input tapes.
+ * initialize state needed for the merge into the btree leaf pages.
*/
- btspool->bts_tape = btspool->bts_ntapes - 1;
- _bt_spoolflush(btspool);
- _bt_spoolswap(btspool);
-
- ++npass;
- nruns = 0;
-
- for (;;) { /* run */
- /*
- * each run starts by selecting a new output tape. the
- * merged results of a given run are always sent to this
- * one tape.
- */
- btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes;
- otape = btspool->bts_otape[btspool->bts_tape];
-
- /*
- * initialize the priority queue by loading it with the
- * first element of the given run in each tape. since we
- * are starting a new run, we reset the tape (clearing the
- * End-Of-Run marker) before reading it. this means that
- * _bt_taperead will return 0 only if the tape is actually
- * at EOF.
- */
- memset((char *) &q, 0, sizeof(BTPriQueue));
- goodtapes = 0;
- for (t = 0; t < btspool->bts_ntapes; ++t) {
- itape = btspool->bts_itape[t];
- tapepos[t] = itape->bttb_data;
- tapedone[t] = 0;
- _bt_tapereset(itape);
- do {
- if (_bt_taperead(itape) == 0) {
- tapedone[t] = 1;
- }
- } while (!tapedone[t] && EMPTYTAPE(itape));
- if (!tapedone[t]) {
- ++goodtapes;
- e.btpqe_tape = t;
- _bt_setsortkey(index, _bt_tapenext(itape, &tapepos[t]),
- &(e.btpqe_item));
- if (e.btpqe_item.btsk_item != (BTItem) NULL) {
- _bt_pqadd(&q, &e);
- }
- }
- }
- /*
- * if we don't have any tapes with any input (i.e., they
- * are all at EOF), there is no work to do in this run --
- * we must be done with this pass.
- */
- if (goodtapes == 0) {
- break; /* for */
- }
- ++nruns;
-
- /*
- * output the smallest element from the queue until there
- * are no more.
- */
- while (_bt_pqnext(&q, &e) >= 0) { /* item */
+ state = (BTPageState *) _bt_pagestate(index, BTP_LEAF, 0, true);
+
+ npass = 0;
+ do
+ { /* pass */
+
/*
- * replace the element taken from priority queue,
- * fetching a new block if needed. a tape can run out
- * if it hits either End-Of-Run or EOF.
+ * each pass starts by flushing the previous outputs and swapping
+ * inputs and outputs. flushing sets End-of-Run for any dirty
+ * output tapes. swapping clears the new output tapes and rewinds
+ * the new input tapes.
*/
- t = e.btpqe_tape;
- btsk = e.btpqe_item;
- bti = btsk.btsk_item;
- if (bti != (BTItem) NULL) {
- btisz = BTITEMSZ(bti);
- btisz = DOUBLEALIGN(btisz);
- if (doleaf) {
- _bt_buildadd(index, state, bti, BTP_LEAF);
-#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
+ btspool->bts_tape = btspool->bts_ntapes - 1;
+ _bt_spoolflush(btspool);
+ _bt_spoolswap(btspool);
+
+ ++npass;
+ nruns = 0;
+
+ for (;;)
+ { /* run */
+
+ /*
+ * each run starts by selecting a new output tape. the merged
+ * results of a given run are always sent to this one tape.
+ */
+ btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes;
+ otape = btspool->bts_otape[btspool->bts_tape];
+
+ /*
+ * initialize the priority queue by loading it with the first
+ * element of the given run in each tape. since we are
+ * starting a new run, we reset the tape (clearing the
+ * End-Of-Run marker) before reading it. this means that
+ * _bt_taperead will return 0 only if the tape is actually at
+ * EOF.
+ */
+ memset((char *) &q, 0, sizeof(BTPriQueue));
+ goodtapes = 0;
+ for (t = 0; t < btspool->bts_ntapes; ++t)
{
- bool isnull;
- Datum d = index_getattr(&(bti->bti_itup), 1,
- index->rd_att, &isnull);
- printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into block %d\n",
- npass, nruns, d, t,
- BufferGetBlockNumber(state->btps_buf));
+ itape = btspool->bts_itape[t];
+ tapepos[t] = itape->bttb_data;
+ tapedone[t] = 0;
+ _bt_tapereset(itape);
+ do
+ {
+ if (_bt_taperead(itape) == 0)
+ {
+ tapedone[t] = 1;
+ }
+ } while (!tapedone[t] && EMPTYTAPE(itape));
+ if (!tapedone[t])
+ {
+ ++goodtapes;
+ e.btpqe_tape = t;
+ _bt_setsortkey(index, _bt_tapenext(itape, &tapepos[t]),
+ &(e.btpqe_item));
+ if (e.btpqe_item.btsk_item != (BTItem) NULL)
+ {
+ _bt_pqadd(&q, &e);
+ }
+ }
}
-#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
- } else {
- if (SPCLEFT(otape) < btisz) {
- /*
- * if it's full, write it out and add the
- * item to the next block. (since we will
- * be adding another tuple immediately
- * after this, we can be sure that there
- * will be at least one more block in this
- * run and so we know we do *not* want to
- * set End-Of-Run here.)
- */
- _bt_tapewrite(otape, 0);
- }
- _bt_tapeadd(otape, bti, btisz);
-#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
+
+ /*
+ * if we don't have any tapes with any input (i.e., they are
+ * all at EOF), there is no work to do in this run -- we must
+ * be done with this pass.
+ */
+ if (goodtapes == 0)
{
- bool isnull;
- Datum d = index_getattr(&(bti->bti_itup), 1,
- index->rd_att, &isnull);
- printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into output tape %d\n",
- npass, nruns, d, t,
- btspool->bts_tape);
- }
-#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
- }
-
- if ( btsk.btsk_datum != (Datum*) NULL )
- pfree ((void*)(btsk.btsk_datum));
- if ( btsk.btsk_nulls != (char*) NULL )
- pfree ((void*)(btsk.btsk_nulls));
-
- }
- itape = btspool->bts_itape[t];
- if (!tapedone[t]) {
- BTItem newbti = _bt_tapenext(itape, &tapepos[t]);
-
- if (newbti == (BTItem) NULL) {
- do {
- if (_bt_taperead(itape) == 0) {
- tapedone[t] = 1;
- }
- } while (!tapedone[t] && EMPTYTAPE(itape));
- if (!tapedone[t]) {
- tapepos[t] = itape->bttb_data;
- newbti = _bt_tapenext(itape, &tapepos[t]);
+ break; /* for */
}
- }
- if (newbti != (BTItem) NULL) {
- BTPriQueueElem nexte;
-
- nexte.btpqe_tape = t;
- _bt_setsortkey(index, newbti, &(nexte.btpqe_item));
- _bt_pqadd(&q, &nexte);
- }
+ ++nruns;
+
+ /*
+ * output the smallest element from the queue until there are
+ * no more.
+ */
+ while (_bt_pqnext(&q, &e) >= 0)
+ { /* item */
+
+ /*
+ * replace the element taken from priority queue, fetching
+ * a new block if needed. a tape can run out if it hits
+ * either End-Of-Run or EOF.
+ */
+ t = e.btpqe_tape;
+ btsk = e.btpqe_item;
+ bti = btsk.btsk_item;
+ if (bti != (BTItem) NULL)
+ {
+ btisz = BTITEMSZ(bti);
+ btisz = DOUBLEALIGN(btisz);
+ if (doleaf)
+ {
+ _bt_buildadd(index, state, bti, BTP_LEAF);
+#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
+ {
+ bool isnull;
+ Datum d = index_getattr(&(bti->bti_itup), 1,
+ index->rd_att, &isnull);
+
+ printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into block %d\n",
+ npass, nruns, d, t,
+ BufferGetBlockNumber(state->btps_buf));
+ }
+#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
+ }
+ else
+ {
+ if (SPCLEFT(otape) < btisz)
+ {
+
+ /*
+ * if it's full, write it out and add the item
+ * to the next block. (since we will be
+ * adding another tuple immediately after
+ * this, we can be sure that there will be at
+ * least one more block in this run and so we
+ * know we do *not* want to set End-Of-Run
+ * here.)
+ */
+ _bt_tapewrite(otape, 0);
+ }
+ _bt_tapeadd(otape, bti, btisz);
+#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
+ {
+ bool isnull;
+ Datum d = index_getattr(&(bti->bti_itup), 1,
+ index->rd_att, &isnull);
+
+ printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into output tape %d\n",
+ npass, nruns, d, t,
+ btspool->bts_tape);
+ }
+#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
+ }
+
+ if (btsk.btsk_datum != (Datum *) NULL)
+ pfree((void *) (btsk.btsk_datum));
+ if (btsk.btsk_nulls != (char *) NULL)
+ pfree((void *) (btsk.btsk_nulls));
+
+ }
+ itape = btspool->bts_itape[t];
+ if (!tapedone[t])
+ {
+ BTItem newbti = _bt_tapenext(itape, &tapepos[t]);
+
+ if (newbti == (BTItem) NULL)
+ {
+ do
+ {
+ if (_bt_taperead(itape) == 0)
+ {
+ tapedone[t] = 1;
+ }
+ } while (!tapedone[t] && EMPTYTAPE(itape));
+ if (!tapedone[t])
+ {
+ tapepos[t] = itape->bttb_data;
+ newbti = _bt_tapenext(itape, &tapepos[t]);
+ }
+ }
+ if (newbti != (BTItem) NULL)
+ {
+ BTPriQueueElem nexte;
+
+ nexte.btpqe_tape = t;
+ _bt_setsortkey(index, newbti, &(nexte.btpqe_item));
+ _bt_pqadd(&q, &nexte);
+ }
+ }
+ } /* item */
+
+ /*
+ * that's it for this run. flush the output tape, marking
+ * End-of-Run.
+ */
+ _bt_tapewrite(otape, 1);
+ } /* run */
+
+ /*
+ * we are here because we ran out of input on all of the input
+ * tapes.
+ *
+ * if this pass did not generate more actual output runs than we have
+ * tapes, we know we have at most one run in each tape. this
+ * means that we are ready to merge into the final btree leaf
+ * pages instead of merging into a tape file.
+ */
+ if (nruns <= btspool->bts_ntapes)
+ {
+ doleaf = true;
}
- } /* item */
-
- /*
- * that's it for this run. flush the output tape, marking
- * End-of-Run.
- */
- _bt_tapewrite(otape, 1);
- } /* run */
-
- /*
- * we are here because we ran out of input on all of the input
- * tapes.
- *
- * if this pass did not generate more actual output runs than
- * we have tapes, we know we have at most one run in each
- * tape. this means that we are ready to merge into the final
- * btree leaf pages instead of merging into a tape file.
- */
- if (nruns <= btspool->bts_ntapes) {
- doleaf = true;
- }
- } while (nruns > 0); /* pass */
+ } while (nruns > 0); /* pass */
- _bt_uppershutdown(index, state);
+ _bt_uppershutdown(index, state);
}
@@ -1320,62 +1397,65 @@ _bt_merge(Relation index, BTSpool *btspool)
void
_bt_upperbuild(Relation index)
{
- Buffer rbuf;
- BlockNumber blk;
- Page rpage;
- BTPageOpaque ropaque;
- BTPageState *state;
- BTItem nbti;
-
- /*
- * find the first leaf block. while we're at it, clear the
- * BTP_ROOT flag that we set while building it (so we could find
- * it later).
- */
- rbuf = _bt_getroot(index, BT_WRITE);
- blk = BufferGetBlockNumber(rbuf);
- rpage = BufferGetPage(rbuf);
- ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
- ropaque->btpo_flags &= ~BTP_ROOT;
- _bt_wrtbuf(index, rbuf);
-
- state = (BTPageState *) _bt_pagestate(index, 0, 0, true);
-
- /* for each page... */
- do {
-#if 0
- printf("\t\tblk=%d\n", blk);
-#endif
- rbuf = _bt_getbuf(index, blk, BT_READ);
+ Buffer rbuf;
+ BlockNumber blk;
+ Page rpage;
+ BTPageOpaque ropaque;
+ BTPageState *state;
+ BTItem nbti;
+
+ /*
+ * find the first leaf block. while we're at it, clear the BTP_ROOT
+ * flag that we set while building it (so we could find it later).
+ */
+ rbuf = _bt_getroot(index, BT_WRITE);
+ blk = BufferGetBlockNumber(rbuf);
rpage = BufferGetPage(rbuf);
ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
-
- /* for each item... */
- if (!PageIsEmpty(rpage)) {
- /*
- * form a new index tuple corresponding to the minimum key
- * of the lower page and insert it into a page at this
- * level.
- */
- nbti = _bt_minitem(rpage, blk, P_RIGHTMOST(ropaque));
+ ropaque->btpo_flags &= ~BTP_ROOT;
+ _bt_wrtbuf(index, rbuf);
+
+ state = (BTPageState *) _bt_pagestate(index, 0, 0, true);
+
+ /* for each page... */
+ do
+ {
+#if 0
+ printf("\t\tblk=%d\n", blk);
+#endif
+ rbuf = _bt_getbuf(index, blk, BT_READ);
+ rpage = BufferGetPage(rbuf);
+ ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
+
+ /* for each item... */
+ if (!PageIsEmpty(rpage))
+ {
+
+ /*
+ * form a new index tuple corresponding to the minimum key of
+ * the lower page and insert it into a page at this level.
+ */
+ nbti = _bt_minitem(rpage, blk, P_RIGHTMOST(ropaque));
#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
- {
- bool isnull;
- Datum d = index_getattr(&(nbti->bti_itup), 1, index->rd_att,
- &isnull);
- printf("_bt_upperbuild: inserting <%x> at %d\n",
- d, state->btps_level);
- }
-#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
- _bt_buildadd(index, state, nbti, 0);
- pfree((void *) nbti);
- }
- blk = ropaque->btpo_next;
- _bt_relbuf(index, rbuf, BT_READ);
- } while (blk != P_NONE);
-
- _bt_uppershutdown(index, state);
+ {
+ bool isnull;
+ Datum d = index_getattr(&(nbti->bti_itup), 1, index->rd_att,
+ &isnull);
+
+ printf("_bt_upperbuild: inserting <%x> at %d\n",
+ d, state->btps_level);
+ }
+#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
+ _bt_buildadd(index, state, nbti, 0);
+ pfree((void *) nbti);
+ }
+ blk = ropaque->btpo_next;
+ _bt_relbuf(index, rbuf, BT_READ);
+ } while (blk != P_NONE);
+
+ _bt_uppershutdown(index, state);
}
+
#endif
/*
@@ -1385,17 +1465,17 @@ _bt_upperbuild(Relation index)
void
_bt_leafbuild(Relation index, void *spool)
{
- _bt_isortcmpinit (index, (BTSpool *) spool);
+ _bt_isortcmpinit(index, (BTSpool *) spool);
#ifdef BTREE_BUILD_STATS
- if ( ShowExecutorStats )
- {
- fprintf(stderr, "! BtreeBuild (Spool) Stats:\n");
- ShowUsage ();
- ResetUsage ();
- }
+ if (ShowExecutorStats)
+ {
+ fprintf(stderr, "! BtreeBuild (Spool) Stats:\n");
+ ShowUsage();
+ ResetUsage();
+ }
#endif
- _bt_merge(index, (BTSpool *) spool);
+ _bt_merge(index, (BTSpool *) spool);
}