What looks like some *major* improvements to btree indexing...

Patches from: aoki@CS.Berkeley.EDU (Paul M. Aoki) i gave jolly my btree bulkload code a long, long time ago but never gave him a bunch of my bugfixes. here's a diff against the 6.0 baseline. for some reason, this code has slowed down somewhat relative to the insertion-build code on very small tables. don't know why -- it used to be within about 10%. anyway, here are some (highly unscientific!) timings on a dec 3000/300 for synthetic tables with 10k, 100k and 1000k tuples (basically, 1mb, 10mb and 100mb heaps). 'c' means clustered (pre-sorted) inputs and 'u' means unclustered (randomly ordered) inputs. the 10k table basically fits in the buffer pool, but the 100k and 1000k tables don't. as you can see, insertion build is fine if you've sorted your heaps on your index key or if your heap fits in core, but is absolutely horrible on unordered data (yes, that's 7.5 hours to index 100mb of data...) because of the zillions of random i/os. if it doesn't work for you for whatever reason, you can always turn it back off by flipping the FastBuild flag in nbtree.c. i don't have time to maintain it. good luck! baseline code: time psql -c 'create index c10 on k10 using btree (c int4_ops)' bttest real 8.6 time psql -c 'create index u10 on k10 using btree (b int4_ops)' bttest real 9.1 time psql -c 'create index c100 on k100 using btree (c int4_ops)' bttest real 59.2 time psql -c 'create index u100 on k100 using btree (b int4_ops)' bttest real 652.4 time psql -c 'create index c1000 on k1000 using btree (c int4_ops)' bttest real 636.1 time psql -c 'create index u1000 on k1000 using btree (b int4_ops)' bttest real 26772.9 bulkloading code: time psql -c 'create index c10 on k10 using btree (c int4_ops)' bttest real 11.3 time psql -c 'create index u10 on k10 using btree (b int4_ops)' bttest real 10.4 time psql -c 'create index c100 on k100 using btree (c int4_ops)' bttest real 59.5 time psql -c 'create index u100 on k100 using btree (b int4_ops)' bttest real 63.5 time psql -c 'create index c1000 on k1000 using btree (c int4_ops)' bttest real 636.9 time psql -c 'create index u1000 on k1000 using btree (b int4_ops)' bttest real 701.0
author: Marc G. Fournier <scrappy@hub.org> 1997-02-12 05:04:52 +0000
committer: Marc G. Fournier <scrappy@hub.org> 1997-02-12 05:04:52 +0000
commit: 5d9f146c6423dcc9b48b87407e21a82c6ab9419b (patch)
tree: 23aa020eb903bee30a93f7fb180c6bb14edcd796 /src/backend/access/nbtree/nbtree.c
parent: d5a3f52d6267f52ec6e97134e4f1fd2b1b0fdcbb (diff)
download: postgresql-5d9f146c6423dcc9b48b87407e21a82c6ab9419b.tar.gz
1 files changed, 51 insertions, 22 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 2ecce30863..0624bd06a8 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.12 1997/01/10 09:46:33 vadim Exp $
+ *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.13 1997/02/12 05:04:17 scrappy Exp $
  *
  * NOTES
  *    This file contains only the public interface routines.
@@ -33,8 +33,8 @@
 # include <string.h>
 #endif
 
-bool	BuildingBtree = false;
-bool	FastBuild = false; /* turn this on to make bulk builds work*/
+bool	BuildingBtree = false;	/* see comment in btbuild() */
+bool	FastBuild = true;	/* use sort/build instead of insertion build */
 
 /*
  *  btbuild() -- build a new btree index.
@@ -67,21 +67,34 @@ btbuild(Relation heap,
     int i;
     BTItem btitem;
 #ifndef OMIT_PARTIAL_INDEX
-    ExprContext *econtext;
-    TupleTable tupleTable;
-    TupleTableSlot *slot;
+    ExprContext *econtext = (ExprContext *) NULL;
+    TupleTable tupleTable = (TupleTable) NULL;
+    TupleTableSlot *slot = (TupleTableSlot *) NULL;
 #endif
     Oid hrelid, irelid;
     Node *pred, *oldPred;
-    void *spool;
+    void *spool = (void *) NULL;
     bool isunique;
-    
+    bool usefast;
+
+#if 0
+    ResetBufferUsage();
+#endif
+
     /* note that this is a new btree */
     BuildingBtree = true;
     
     pred = predInfo->pred;
     oldPred = predInfo->oldPred;
 
+    /*
+     * bootstrap processing does something strange, so don't use
+     * sort/build for initial catalog indices.  at some point i need
+     * to look harder at this.  (there is some kind of incremental
+     * processing going on there.) -- pma 08/29/95
+     */
+    usefast = (FastBuild && IsNormalProcessingMode());
+
     /* see if index is unique */
     isunique = IndexIsUniqueNoCache(RelationGetRelationId(index));
 
@@ -110,13 +123,16 @@ btbuild(Relation heap,
 	slot = ExecAllocTableSlot(tupleTable);
 	econtext = makeNode(ExprContext);
 	FillDummyExprContext(econtext, slot, htupdesc, InvalidBuffer);
+
+	/*
+	 * we never want to use sort/build if we are extending an
+	 * existing partial index -- it works by inserting the
+	 * newly-qualifying tuples into the existing index.
+	 * (sort/build would overwrite the existing index with one
+	 * consisting of the newly-qualifying tuples.)
+	 */
+	usefast = false;
     }
-	else
-	{
-		econtext = NULL;
-		tupleTable = NULL;
-		slot = NULL;
-	}
 #endif /* OMIT_PARTIAL_INDEX */
     
     /* start a heap scan */
@@ -126,12 +142,10 @@ btbuild(Relation heap,
     /* build the index */
     nhtups = nitups = 0;
     
-    if (FastBuild) {
+    if (usefast) {
 	spool = _bt_spoolinit(index, 7);
 	res = (InsertIndexResult) NULL;
     }
-	else
-		spool = NULL;
 
     for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) {
 	
@@ -219,7 +233,7 @@ btbuild(Relation heap,
 	 * into a spool page for subsequent processing.  otherwise, we
 	 * insert into the btree.
 	 */
-	if (FastBuild) {
+	if (usefast) {
 	    _bt_spool(index, btitem, spool);
 	} else {
 	    res = _bt_doinsert(index, btitem, isunique, heap);
@@ -248,12 +262,24 @@ btbuild(Relation heap,
      * merging the runs, (2) inserting the sorted tuples into btree
      * pages and (3) building the upper levels.
      */
-    if (FastBuild) {
-	_bt_spool(index, (BTItem) NULL, spool);	/* flush spool */
+    if (usefast) {
+	_bt_spool(index, (BTItem) NULL, spool);	/* flush the spool */
 	_bt_leafbuild(index, spool);
 	_bt_spooldestroy(spool);
     }
 
+#if 0
+    {
+	extern int ReadBufferCount, BufferHitCount, BufferFlushCount;
+	extern long NDirectFileRead, NDirectFileWrite;
+
+	printf("buffer(%d): r=%d w=%d\n", heap->rd_rel->relblocksz,
+	       ReadBufferCount - BufferHitCount, BufferFlushCount);
+	printf("direct(%d): r=%d w=%d\n", LocalBlockSize,
+	       NDirectFileRead, NDirectFileWrite);
+    }
+#endif
+
     /*
      *  Since we just counted the tuples in the heap, we update its
      *  stats in pg_class to guarantee that the planner takes advantage
@@ -312,7 +338,10 @@ btinsert(Relation rel, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation
 
     pfree(btitem);
     pfree(itup);
-    
+
+    /* adjust any active scans that will be affected by this insertion */
+    _bt_adjscans(rel, &(res->pointerData), BT_INSERT);
+
     return (res);
 }
 
@@ -533,7 +562,7 @@ void
 btdelete(Relation rel, ItemPointer tid)
 {
     /* adjust any active scans that will be affected by this deletion */
-    _bt_adjscans(rel, tid);
+    _bt_adjscans(rel, tid, BT_DELETE);
     
     /* delete the data from the page */
     _bt_pagedel(rel, tid);
author	Marc G. Fournier <scrappy@hub.org>	1997-02-12 05:04:52 +0000
committer	Marc G. Fournier <scrappy@hub.org>	1997-02-12 05:04:52 +0000
commit	5d9f146c6423dcc9b48b87407e21a82c6ab9419b (patch)
tree	23aa020eb903bee30a93f7fb180c6bb14edcd796 /src/backend/access/nbtree/nbtree.c
parent	d5a3f52d6267f52ec6e97134e4f1fd2b1b0fdcbb (diff)
download	postgresql-5d9f146c6423dcc9b48b87407e21a82c6ab9419b.tar.gz