summaryrefslogtreecommitdiff
path: root/src/backend/access/gist/gistxlog.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-09-09 18:02:24 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2016-09-09 18:02:36 -0400
commitb1328d78f88cdf4f7504004159e530b776f0de16 (patch)
tree8894040f11d714d00e3e4826cf9ec8797d8cdae0 /src/backend/access/gist/gistxlog.c
parent5c609a742f294907512b946dbaf1feaa3b71ddc7 (diff)
downloadpostgresql-b1328d78f88cdf4f7504004159e530b776f0de16.tar.gz
Invent PageIndexTupleOverwrite, and teach BRIN and GiST to use it.
PageIndexTupleOverwrite performs approximately the same function as PageIndexTupleDelete (or PageIndexDeleteNoCompact) followed by PageAddItem targeting the same item pointer offset. But in the case where the new tuple is the same size as the old, it avoids shuffling other data around on the page, because the new tuple is placed where the old one was rather than being appended to the end of the page. This has been shown to provide a substantial speedup for some GiST use-cases. Also, this change allows some API simplifications: we can get rid of the rather klugy and error-prone PAI_ALLOW_FAR_OFFSET flag for PageAddItemExtended, since that was used only to cover a corner case for BRIN that's better expressed by using PageIndexTupleOverwrite. Note that this patch causes a rather subtle WAL incompatibility: the physical page content change represented by certain WAL records is now different than it was before, because while the tuples have the same itempointer line numbers, the tuples themselves are in different places. I have not bumped the WAL version number because I think it doesn't matter unless you are trying to do bitwise comparisons of original and replayed pages, and in any case we're early in a devel cycle and there will probably be more WAL changes before v10 gets out the door. There is probably room to make use of PageIndexTupleOverwrite in SP-GiST and GIN too, but that is left for a future patch. Andrey Borodin, reviewed by Anastasia Lubennikova, whacked around a bit by me Discussion: <CAJEAwVGQjGGOj6mMSgMwGvtFd5Kwe6VFAxY=uEPZWMDjzbn4VQ@mail.gmail.com>
Diffstat (limited to 'src/backend/access/gist/gistxlog.c')
-rw-r--r--src/backend/access/gist/gistxlog.c29
1 files changed, 26 insertions, 3 deletions
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index 01c7ef7ea6..5853d7638e 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -80,9 +80,31 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
page = (Page) BufferGetPage(buffer);
- /* Delete old tuples */
- if (xldata->ntodelete > 0)
+ if (xldata->ntodelete == 1 && xldata->ntoinsert == 1)
{
+ /*
+ * When replacing one tuple with one other tuple, we must use
+ * PageIndexTupleOverwrite for consistency with gistplacetopage.
+ */
+ OffsetNumber offnum = *((OffsetNumber *) data);
+ IndexTuple itup;
+ Size itupsize;
+
+ data += sizeof(OffsetNumber);
+ itup = (IndexTuple) data;
+ itupsize = IndexTupleSize(itup);
+ if (!PageIndexTupleOverwrite(page, offnum, (Item) itup, itupsize))
+ elog(ERROR, "failed to add item to GiST index page, size %d bytes",
+ (int) itupsize);
+ data += itupsize;
+ /* should be nothing left after consuming 1 tuple */
+ Assert(data - begin == datalen);
+ /* update insertion count for assert check below */
+ ninserted++;
+ }
+ else if (xldata->ntodelete > 0)
+ {
+ /* Otherwise, delete old tuples if any */
OffsetNumber *todelete = (OffsetNumber *) data;
data += sizeof(OffsetNumber) * xldata->ntodelete;
@@ -92,7 +114,7 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
GistMarkTuplesDeleted(page);
}
- /* add tuples */
+ /* Add new tuples if any */
if (data - begin < datalen)
{
OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber :
@@ -115,6 +137,7 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
}
}
+ /* Check that XLOG record contained expected number of tuples */
Assert(ninserted == xldata->ntoinsert);
PageSetLSN(page, lsn);