diff options
| author | Tom Lane <tgl@sss.pgh.pa.us> | 2008-03-16 23:15:08 +0000 |
|---|---|---|
| committer | Tom Lane <tgl@sss.pgh.pa.us> | 2008-03-16 23:15:08 +0000 |
| commit | 787eba734be8e1fb8c5fdb101a02e826cccec3e9 (patch) | |
| tree | 511dc9dc0fc8e2f04c2e4862eb9e8348ee87396a /src/backend/access/hash/hashpage.c | |
| parent | ec6550c6c06ba3a0cf4df31d1016aa8eb8716ddb (diff) | |
| download | postgresql-787eba734be8e1fb8c5fdb101a02e826cccec3e9.tar.gz | |
When creating a large hash index, pre-sort the index entries by estimated
bucket number, so as to ensure locality of access to the index during the
insertion step. Without this, building an index significantly larger than
available RAM takes a very long time because of thrashing. On the other
hand, sorting is just useless overhead when the index does fit in RAM.
We choose to sort when the initial index size exceeds effective_cache_size.
This is a revised version of work by Tom Raney and Shreya Bhargava.
Diffstat (limited to 'src/backend/access/hash/hashpage.c')
| -rw-r--r-- | src/backend/access/hash/hashpage.c | 23 |
1 files changed, 20 insertions, 3 deletions
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index ec6f4b390f..d179af0121 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.73 2008/03/15 20:46:31 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.74 2008/03/16 23:15:08 tgl Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque @@ -315,13 +315,14 @@ _hash_chgbufaccess(Relation rel, * the initial buckets, and the initial bitmap page. * * The initial number of buckets is dependent on num_tuples, an estimate - * of the number of tuples to be loaded into the index initially. + * of the number of tuples to be loaded into the index initially. The + * chosen number of buckets is returned. * * We are fairly cavalier about locking here, since we know that no one else * could be accessing this index. In particular the rule about not holding * multiple buffer locks is ignored. */ -void +uint32 _hash_metapinit(Relation rel, double num_tuples) { HashMetaPage metap; @@ -438,10 +439,21 @@ _hash_metapinit(Relation rel, double num_tuples) metap->hashm_firstfree = 0; /* + * Release buffer lock on the metapage while we initialize buckets. + * Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS + * won't accomplish anything. It's a bad idea to hold buffer locks + * for long intervals in any case, since that can block the bgwriter. + */ + _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK); + + /* * Initialize the first N buckets */ for (i = 0; i < num_buckets; i++) { + /* Allow interrupts, in case N is huge */ + CHECK_FOR_INTERRUPTS(); + buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i)); pg = BufferGetPage(buf); pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); @@ -453,6 +465,9 @@ _hash_metapinit(Relation rel, double num_tuples) _hash_wrtbuf(rel, buf); } + /* Now reacquire buffer lock on metapage */ + _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE); + /* * Initialize first bitmap page */ @@ -460,6 +475,8 @@ _hash_metapinit(Relation rel, double num_tuples) /* all done */ _hash_wrtbuf(rel, metabuf); + + return num_buckets; } /* |
