summaryrefslogtreecommitdiff
path: root/src/backend/access/hash/hashpage.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2008-03-16 23:15:08 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2008-03-16 23:15:08 +0000
commit787eba734be8e1fb8c5fdb101a02e826cccec3e9 (patch)
tree511dc9dc0fc8e2f04c2e4862eb9e8348ee87396a /src/backend/access/hash/hashpage.c
parentec6550c6c06ba3a0cf4df31d1016aa8eb8716ddb (diff)
downloadpostgresql-787eba734be8e1fb8c5fdb101a02e826cccec3e9.tar.gz
When creating a large hash index, pre-sort the index entries by estimated
bucket number, so as to ensure locality of access to the index during the insertion step. Without this, building an index significantly larger than available RAM takes a very long time because of thrashing. On the other hand, sorting is just useless overhead when the index does fit in RAM. We choose to sort when the initial index size exceeds effective_cache_size. This is a revised version of work by Tom Raney and Shreya Bhargava.
Diffstat (limited to 'src/backend/access/hash/hashpage.c')
-rw-r--r--src/backend/access/hash/hashpage.c23
1 files changed, 20 insertions, 3 deletions
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index ec6f4b390f..d179af0121 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.73 2008/03/15 20:46:31 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.74 2008/03/16 23:15:08 tgl Exp $
*
* NOTES
* Postgres hash pages look like ordinary relation pages. The opaque
@@ -315,13 +315,14 @@ _hash_chgbufaccess(Relation rel,
* the initial buckets, and the initial bitmap page.
*
* The initial number of buckets is dependent on num_tuples, an estimate
- * of the number of tuples to be loaded into the index initially.
+ * of the number of tuples to be loaded into the index initially. The
+ * chosen number of buckets is returned.
*
* We are fairly cavalier about locking here, since we know that no one else
* could be accessing this index. In particular the rule about not holding
* multiple buffer locks is ignored.
*/
-void
+uint32
_hash_metapinit(Relation rel, double num_tuples)
{
HashMetaPage metap;
@@ -438,10 +439,21 @@ _hash_metapinit(Relation rel, double num_tuples)
metap->hashm_firstfree = 0;
/*
+ * Release buffer lock on the metapage while we initialize buckets.
+ * Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
+ * won't accomplish anything. It's a bad idea to hold buffer locks
+ * for long intervals in any case, since that can block the bgwriter.
+ */
+ _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
+
+ /*
* Initialize the first N buckets
*/
for (i = 0; i < num_buckets; i++)
{
+ /* Allow interrupts, in case N is huge */
+ CHECK_FOR_INTERRUPTS();
+
buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i));
pg = BufferGetPage(buf);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
@@ -453,6 +465,9 @@ _hash_metapinit(Relation rel, double num_tuples)
_hash_wrtbuf(rel, buf);
}
+ /* Now reacquire buffer lock on metapage */
+ _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
+
/*
* Initialize first bitmap page
*/
@@ -460,6 +475,8 @@ _hash_metapinit(Relation rel, double num_tuples)
/* all done */
_hash_wrtbuf(rel, metabuf);
+
+ return num_buckets;
}
/*