diff options
Diffstat (limited to 'src/backend/access/hash/hashovfl.c')
| -rw-r--r-- | src/backend/access/hash/hashovfl.c | 122 |
1 files changed, 69 insertions, 53 deletions
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index 3ab5e1edeb..7b9b585eef 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.66 2009/01/01 17:23:35 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.67 2009/11/01 21:25:25 tgl Exp $ * * NOTES * Overflow pages look like ordinary relation pages. @@ -582,18 +582,15 @@ _hash_squeezebucket(Relation rel, BlockNumber bucket_blkno, BufferAccessStrategy bstrategy) { - Buffer wbuf; - Buffer rbuf = 0; BlockNumber wblkno; BlockNumber rblkno; + Buffer wbuf; + Buffer rbuf; Page wpage; Page rpage; HashPageOpaque wopaque; HashPageOpaque ropaque; - OffsetNumber woffnum; - OffsetNumber roffnum; - IndexTuple itup; - Size itemsz; + bool wbuf_dirty; /* * start squeezing into the base bucket page. @@ -622,11 +619,12 @@ _hash_squeezebucket(Relation rel, * usually smaller than the buffer ring being used by VACUUM, else using * the access strategy here would be counterproductive. */ + rbuf = InvalidBuffer; ropaque = wopaque; do { rblkno = ropaque->hasho_nextblkno; - if (ropaque != wopaque) + if (rbuf != InvalidBuffer) _hash_relbuf(rel, rbuf); rbuf = _hash_getbuf_with_strategy(rel, rblkno, @@ -641,12 +639,23 @@ _hash_squeezebucket(Relation rel, /* * squeeze the tuples. */ - roffnum = FirstOffsetNumber; + wbuf_dirty = false; for (;;) { - /* this test is needed in case page is empty on entry */ - if (roffnum <= PageGetMaxOffsetNumber(rpage)) + OffsetNumber roffnum; + OffsetNumber maxroffnum; + OffsetNumber deletable[MaxOffsetNumber]; + int ndeletable = 0; + + /* Scan each tuple in "read" page */ + maxroffnum = PageGetMaxOffsetNumber(rpage); + for (roffnum = FirstOffsetNumber; + roffnum <= maxroffnum; + roffnum = OffsetNumberNext(roffnum)) { + IndexTuple itup; + Size itemsz; + itup = (IndexTuple) PageGetItem(rpage, PageGetItemId(rpage, roffnum)); itemsz = IndexTupleDSize(*itup); @@ -663,12 +672,22 @@ _hash_squeezebucket(Relation rel, wblkno = wopaque->hasho_nextblkno; Assert(BlockNumberIsValid(wblkno)); - _hash_wrtbuf(rel, wbuf); + if (wbuf_dirty) + _hash_wrtbuf(rel, wbuf); + else + _hash_relbuf(rel, wbuf); + /* nothing more to do if we reached the read page */ if (rblkno == wblkno) { - /* wbuf is already released */ - _hash_wrtbuf(rel, rbuf); + if (ndeletable > 0) + { + /* Delete tuples we already moved off read page */ + PageIndexMultiDelete(rpage, deletable, ndeletable); + _hash_wrtbuf(rel, rbuf); + } + else + _hash_relbuf(rel, rbuf); return; } @@ -680,28 +699,27 @@ _hash_squeezebucket(Relation rel, wpage = BufferGetPage(wbuf); wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); Assert(wopaque->hasho_bucket == bucket); + wbuf_dirty = false; } /* - * we have found room so insert on the "write" page. + * we have found room so insert on the "write" page, being careful + * to preserve hashkey ordering. (If we insert many tuples into + * the same "write" page it would be worth qsort'ing instead of + * doing repeated _hash_pgaddtup.) */ - woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage)); - if (PageAddItem(wpage, (Item) itup, itemsz, woffnum, false, false) - == InvalidOffsetNumber) - elog(ERROR, "failed to add index item to \"%s\"", - RelationGetRelationName(rel)); + (void) _hash_pgaddtup(rel, wbuf, itemsz, itup); + wbuf_dirty = true; - /* - * delete the tuple from the "read" page. PageIndexTupleDelete - * repacks the ItemId array, so 'roffnum' will be "advanced" to - * the "next" ItemId. - */ - PageIndexTupleDelete(rpage, roffnum); + /* remember tuple for deletion from "read" page */ + deletable[ndeletable++] = roffnum; } /* - * if the "read" page is now empty because of the deletion (or because - * it was empty when we got to it), free it. + * If we reach here, there are no live tuples on the "read" page --- + * it was empty when we got to it, or we moved them all. So we + * can just free the page without bothering with deleting tuples + * individually. Then advance to the previous "read" page. * * Tricky point here: if our read and write pages are adjacent in the * bucket chain, our write lock on wbuf will conflict with @@ -709,36 +727,34 @@ _hash_squeezebucket(Relation rel, * removed page. However, in that case we are done anyway, so we can * simply drop the write lock before calling _hash_freeovflpage. */ - if (PageIsEmpty(rpage)) - { - rblkno = ropaque->hasho_prevblkno; - Assert(BlockNumberIsValid(rblkno)); + rblkno = ropaque->hasho_prevblkno; + Assert(BlockNumberIsValid(rblkno)); - /* are we freeing the page adjacent to wbuf? */ - if (rblkno == wblkno) - { - /* yes, so release wbuf lock first */ + /* are we freeing the page adjacent to wbuf? */ + if (rblkno == wblkno) + { + /* yes, so release wbuf lock first */ + if (wbuf_dirty) _hash_wrtbuf(rel, wbuf); - /* free this overflow page (releases rbuf) */ - _hash_freeovflpage(rel, rbuf, bstrategy); - /* done */ - return; - } - - /* free this overflow page, then get the previous one */ + else + _hash_relbuf(rel, wbuf); + /* free this overflow page (releases rbuf) */ _hash_freeovflpage(rel, rbuf, bstrategy); + /* done */ + return; + } - rbuf = _hash_getbuf_with_strategy(rel, - rblkno, - HASH_WRITE, - LH_OVERFLOW_PAGE, - bstrategy); - rpage = BufferGetPage(rbuf); - ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); - Assert(ropaque->hasho_bucket == bucket); + /* free this overflow page, then get the previous one */ + _hash_freeovflpage(rel, rbuf, bstrategy); - roffnum = FirstOffsetNumber; - } + rbuf = _hash_getbuf_with_strategy(rel, + rblkno, + HASH_WRITE, + LH_OVERFLOW_PAGE, + bstrategy); + rpage = BufferGetPage(rbuf); + ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); + Assert(ropaque->hasho_bucket == bucket); } /* NOTREACHED */ |
