From 9155580fd5fc2a0cbb23376dfca7cd21f59c2c7b Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 3 Apr 2019 17:03:15 +0300 Subject: Generate less WAL during GiST, GIN and SP-GiST index build. Instead of WAL-logging every modification during the build separately, first build the index without any WAL-logging, and make a separate pass through the index at the end, to write all pages to the WAL. This significantly reduces the amount of WAL generated, and is usually also faster, despite the extra I/O needed for the extra scan through the index. WAL generated this way is also faster to replay. For GiST, the LSN-NSN interlock makes this a little tricky. All pages must be marked with a valid (i.e. non-zero) LSN, so that the parent-child LSN-NSN interlock works correctly. We now use magic value 1 for that during index build. Change the fake LSN counter to begin from 1000, so that 1 is safely smaller than any real or fake LSN. 2 would've been enough for our purposes, but let's reserve a bigger range, in case we need more special values in the future. Author: Anastasia Lubennikova, Andrey V. Lepikhov Reviewed-by: Heikki Linnakangas, Dmitry Dolgov --- src/backend/access/gist/gist.c | 76 +++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 30 deletions(-) (limited to 'src/backend/access/gist/gist.c') diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index f44c922b5d..2db790c840 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -173,7 +173,7 @@ gistinsert(Relation r, Datum *values, bool *isnull, values, isnull, true /* size is currently bogus */ ); itup->t_tid = *ht_ctid; - gistdoinsert(r, itup, 0, giststate, heapRel); + gistdoinsert(r, itup, 0, giststate, heapRel, false); /* cleanup */ MemoryContextSwitchTo(oldCxt); @@ -220,7 +220,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, Buffer leftchildbuf, List **splitinfo, bool markfollowright, - Relation heapRel) + Relation heapRel, + bool is_build) { BlockNumber blkno = BufferGetBlockNumber(buffer); Page page = BufferGetPage(buffer); @@ -459,7 +460,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, * insertion for that. NB: The number of pages and data segments * specified here must match the calculations in gistXLogSplit()! */ - if (RelationNeedsWAL(rel)) + if (!is_build && RelationNeedsWAL(rel)) XLogEnsureRecordSpace(npage, 1 + npage * 2); START_CRIT_SECTION(); @@ -480,18 +481,30 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer)); dist->page = BufferGetPage(dist->buffer); - /* Write the WAL record */ - if (RelationNeedsWAL(rel)) - recptr = gistXLogSplit(is_leaf, - dist, oldrlink, oldnsn, leftchildbuf, - markfollowright); + /* + * Write the WAL record. + * + * If we're building a new index, however, we don't WAL-log changes + * yet. The LSN-NSN interlock between parent and child requires that + * LSNs never move backwards, so set the LSNs to a value that's + * smaller than any real or fake unlogged LSN that might be generated + * later. (There can't be any concurrent scans during index build, so + * we don't need to be able to detect concurrent splits yet.) + */ + if (is_build) + recptr = GistBuildLSN; else - recptr = gistGetFakeLSN(rel); + { + if (RelationNeedsWAL(rel)) + recptr = gistXLogSplit(is_leaf, + dist, oldrlink, oldnsn, leftchildbuf, + markfollowright); + else + recptr = gistGetFakeLSN(rel); + } for (ptr = dist; ptr; ptr = ptr->next) - { PageSetLSN(ptr->page, recptr); - } /* * Return the new child buffers to the caller. @@ -545,28 +558,29 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, if (BufferIsValid(leftchildbuf)) MarkBufferDirty(leftchildbuf); - if (RelationNeedsWAL(rel)) + if (is_build) + recptr = GistBuildLSN; + else { - OffsetNumber ndeloffs = 0, - deloffs[1]; - - if (OffsetNumberIsValid(oldoffnum)) + if (RelationNeedsWAL(rel)) { - deloffs[0] = oldoffnum; - ndeloffs = 1; - } + OffsetNumber ndeloffs = 0, + deloffs[1]; - recptr = gistXLogUpdate(buffer, - deloffs, ndeloffs, itup, ntup, - leftchildbuf); + if (OffsetNumberIsValid(oldoffnum)) + { + deloffs[0] = oldoffnum; + ndeloffs = 1; + } - PageSetLSN(page, recptr); - } - else - { - recptr = gistGetFakeLSN(rel); - PageSetLSN(page, recptr); + recptr = gistXLogUpdate(buffer, + deloffs, ndeloffs, itup, ntup, + leftchildbuf); + } + else + recptr = gistGetFakeLSN(rel); } + PageSetLSN(page, recptr); if (newblkno) *newblkno = blkno; @@ -607,7 +621,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, */ void gistdoinsert(Relation r, IndexTuple itup, Size freespace, - GISTSTATE *giststate, Relation heapRel) + GISTSTATE *giststate, Relation heapRel, bool is_build) { ItemId iid; IndexTuple idxtuple; @@ -620,6 +634,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, state.freespace = freespace; state.r = r; state.heapRel = heapRel; + state.is_build = is_build; /* Start from the root */ firststack.blkno = GIST_ROOT_BLKNO; @@ -1252,7 +1267,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack, leftchild, &splitinfo, true, - state->heapRel); + state->heapRel, + state->is_build); /* * Before recursing up in case the page was split, release locks on the -- cgit v1.2.1