diff options
Diffstat (limited to 'src/backend/access/hash/hash.c')
| -rw-r--r-- | src/backend/access/hash/hash.c | 178 |
1 files changed, 158 insertions, 20 deletions
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index fbdf0dc04c..0d2f8b6199 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.65 2003/08/04 02:39:57 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.66 2003/09/02 02:18:38 tgl Exp $ * * NOTES * This file contains only the public interface routines. @@ -449,40 +449,178 @@ hashbulkdelete(PG_FUNCTION_ARGS) BlockNumber num_pages; double tuples_removed; double num_index_tuples; - IndexScanDesc iscan; + uint32 deleted_tuples; + uint32 tuples_remaining; + uint32 orig_ntuples; + Bucket orig_maxbucket; + Bucket cur_maxbucket; + Bucket cur_bucket; + Buffer metabuf; + HashMetaPage metap; + HashMetaPageData local_metapage; + /* + * keep track of counts in both float form (to return) and integer form + * (to update hashm_ntuples). It'd be better to make hashm_ntuples a + * double, but that will have to wait for an initdb. + */ tuples_removed = 0; num_index_tuples = 0; + deleted_tuples = 0; + tuples_remaining = 0; /* - * XXX generic implementation --- should be improved! + * Read the metapage to fetch original bucket and tuple counts. Also, + * we keep a copy of the last-seen metapage so that we can use its + * hashm_spares[] values to compute bucket page addresses. This is a + * bit hokey but perfectly safe, since the interesting entries in the + * spares array cannot change under us; and it beats rereading the + * metapage for each bucket. */ + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + orig_maxbucket = metap->hashm_maxbucket; + orig_ntuples = metap->hashm_ntuples; + memcpy(&local_metapage, metap, sizeof(local_metapage)); + _hash_relbuf(rel, metabuf, HASH_READ); + + /* Scan the buckets that we know exist */ + cur_bucket = 0; + cur_maxbucket = orig_maxbucket; + +loop_top: + while (cur_bucket <= cur_maxbucket) + { + BlockNumber bucket_blkno; + BlockNumber blkno; + bool bucket_dirty = false; - /* walk through the entire index */ - iscan = index_beginscan(NULL, rel, SnapshotAny, 0, (ScanKey) NULL); - /* including killed tuples */ - iscan->ignore_killed_tuples = false; + /* Get address of bucket's start page */ + bucket_blkno = BUCKET_TO_BLKNO(&local_metapage, cur_bucket); - while (index_getnext_indexitem(iscan, ForwardScanDirection)) - { - if (callback(&iscan->xs_ctup.t_self, callback_state)) + /* XXX lock bucket here */ + + /* Scan each page in bucket */ + blkno = bucket_blkno; + while (BlockNumberIsValid(blkno)) { - ItemPointerData indextup = iscan->currentItemData; + Buffer buf; + Page page; + HashPageOpaque opaque; + OffsetNumber offno; + OffsetNumber maxoffno; + bool page_dirty = false; + + buf = _hash_getbuf(rel, blkno, HASH_WRITE); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(opaque->hasho_bucket == cur_bucket); + + /* Scan each tuple in page */ + offno = FirstOffsetNumber; + maxoffno = PageGetMaxOffsetNumber(page); + while (offno <= maxoffno) + { + HashItem hitem; + ItemPointer htup; + + hitem = (HashItem) PageGetItem(page, + PageGetItemId(page, offno)); + htup = &(hitem->hash_itup.t_tid); + if (callback(htup, callback_state)) + { + ItemPointerData indextup; + + /* adjust any active scans that will be affected */ + /* (this should be unnecessary) */ + ItemPointerSet(&indextup, blkno, offno); + _hash_adjscans(rel, &indextup); + + /* delete the item from the page */ + PageIndexTupleDelete(page, offno); + bucket_dirty = page_dirty = true; + + /* don't increment offno, instead decrement maxoffno */ + maxoffno = OffsetNumberPrev(maxoffno); + + tuples_removed += 1; + deleted_tuples += 1; + } + else + { + offno = OffsetNumberNext(offno); + + num_index_tuples += 1; + tuples_remaining += 1; + } + } - /* adjust any active scans that will be affected by deletion */ - /* (namely, my own scan) */ - _hash_adjscans(rel, &indextup); + /* + * Write or free page if needed, advance to next page. We want + * to preserve the invariant that overflow pages are nonempty. + */ + blkno = opaque->hasho_nextblkno; + + if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) + _hash_freeovflpage(rel, buf); + else if (page_dirty) + _hash_wrtbuf(rel, buf); + else + _hash_relbuf(rel, buf, HASH_WRITE); + } - /* delete the data from the page */ - _hash_pagedel(rel, &indextup); + /* If we deleted anything, try to compact free space */ + if (bucket_dirty) + _hash_squeezebucket(rel, cur_bucket, bucket_blkno); - tuples_removed += 1; - } + /* XXX unlock bucket here */ + + /* Advance to next bucket */ + cur_bucket++; + } + + /* Write-lock metapage and check for split since we started */ + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + if (cur_maxbucket != metap->hashm_maxbucket) + { + /* There's been a split, so process the additional bucket(s) */ + cur_maxbucket = metap->hashm_maxbucket; + memcpy(&local_metapage, metap, sizeof(local_metapage)); + _hash_relbuf(rel, metabuf, HASH_WRITE); + goto loop_top; + } + + /* Okay, we're really done. Update tuple count in metapage. */ + + if (orig_maxbucket == metap->hashm_maxbucket && + orig_ntuples == metap->hashm_ntuples) + { + /* + * No one has split or inserted anything since start of scan, + * so believe our count as gospel. + */ + metap->hashm_ntuples = tuples_remaining; + } + else + { + /* + * Otherwise, our count is untrustworthy since we may have + * double-scanned tuples in split buckets. Proceed by + * dead-reckoning. + */ + if (metap->hashm_ntuples > deleted_tuples) + metap->hashm_ntuples -= deleted_tuples; else - num_index_tuples += 1; + metap->hashm_ntuples = 0; + num_index_tuples = metap->hashm_ntuples; } - index_endscan(iscan); + _hash_wrtbuf(rel, metabuf); /* return statistics */ num_pages = RelationGetNumberOfBlocks(rel); |
