diff options
Diffstat (limited to 'src/backend/storage/buffer')
| -rw-r--r-- | src/backend/storage/buffer/README | 27 | ||||
| -rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 139 | ||||
| -rw-r--r-- | src/backend/storage/buffer/localbuf.c | 19 |
3 files changed, 70 insertions, 115 deletions
diff --git a/src/backend/storage/buffer/README b/src/backend/storage/buffer/README index 41c9851532..9b8c6a745e 100644 --- a/src/backend/storage/buffer/README +++ b/src/backend/storage/buffer/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.8 2005/03/04 20:21:06 tgl Exp $ +$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.9 2006/03/31 23:32:06 tgl Exp $ Notes about shared buffer access rules -------------------------------------- @@ -12,19 +12,18 @@ the relation. Relation-level locks are not discussed here.) Pins: one must "hold a pin on" a buffer (increment its reference count) before being allowed to do anything at all with it. An unpinned buffer is subject to being reclaimed and reused for a different page at any instant, -so touching it is unsafe. Typically a pin is acquired via ReadBuffer and -released via WriteBuffer (if one modified the page) or ReleaseBuffer (if not). -It is OK and indeed common for a single backend to pin a page more than -once concurrently; the buffer manager handles this efficiently. It is -considered OK to hold a pin for long intervals --- for example, sequential -scans hold a pin on the current page until done processing all the tuples -on the page, which could be quite a while if the scan is the outer scan of -a join. Similarly, btree index scans hold a pin on the current index page. -This is OK because normal operations never wait for a page's pin count to -drop to zero. (Anything that might need to do such a wait is instead -handled by waiting to obtain the relation-level lock, which is why you'd -better hold one first.) Pins may not be held across transaction -boundaries, however. +so touching it is unsafe. Normally a pin is acquired via ReadBuffer and +released via ReleaseBuffer. It is OK and indeed common for a single +backend to pin a page more than once concurrently; the buffer manager +handles this efficiently. It is considered OK to hold a pin for long +intervals --- for example, sequential scans hold a pin on the current page +until done processing all the tuples on the page, which could be quite a +while if the scan is the outer scan of a join. Similarly, btree index +scans hold a pin on the current index page. This is OK because normal +operations never wait for a page's pin count to drop to zero. (Anything +that might need to do such a wait is instead handled by waiting to obtain +the relation-level lock, which is why you'd better hold one first.) Pins +may not be held across transaction boundaries, however. Buffer content locks: there are two kinds of buffer lock, shared and exclusive, which act just as you'd expect: multiple backends can hold shared locks on diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 461c9cf1fa..38a5744105 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.205 2006/03/29 21:17:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.206 2006/03/31 23:32:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,13 +17,10 @@ * and pin it so that no one can destroy it while this process * is using it. * - * ReleaseBuffer() -- unpin the buffer + * ReleaseBuffer() -- unpin a buffer * - * WriteNoReleaseBuffer() -- mark the buffer contents as "dirty" - * but don't unpin. The disk IO is delayed until buffer - * replacement. - * - * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer() + * MarkBufferDirty() -- mark a pinned buffer's contents as "dirty". + * The disk write is delayed until buffer replacement or checkpoint. * * BufferSync() -- flush all dirty buffers in the buffer pool. * @@ -101,7 +98,6 @@ static volatile BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr); static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln); static void AtProcExit_Buffers(int code, Datum arg); -static void write_buffer(Buffer buffer, bool unpin); /* @@ -634,11 +630,16 @@ retry: } /* - * write_buffer -- common functionality for - * WriteBuffer and WriteNoReleaseBuffer + * MarkBufferDirty + * + * Marks buffer contents as dirty (actual write happens later). + * + * Buffer must be pinned and exclusive-locked. (If caller does not hold + * exclusive lock, then somebody could be in process of writing the buffer, + * leading to risk of bad data written to disk.) */ -static void -write_buffer(Buffer buffer, bool unpin) +void +MarkBufferDirty(Buffer buffer) { volatile BufferDesc *bufHdr; @@ -647,13 +648,15 @@ write_buffer(Buffer buffer, bool unpin) if (BufferIsLocal(buffer)) { - WriteLocalBuffer(buffer, unpin); + MarkLocalBufferDirty(buffer); return; } bufHdr = &BufferDescriptors[buffer - 1]; Assert(PrivateRefCount[buffer - 1] > 0); + /* unfortunately we can't check if the lock is held exclusively */ + Assert(LWLockHeldByMe(bufHdr->content_lock)); LockBufHdr(bufHdr); @@ -668,35 +671,6 @@ write_buffer(Buffer buffer, bool unpin) bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); UnlockBufHdr(bufHdr); - - if (unpin) - UnpinBuffer(bufHdr, true, true); -} - -/* - * WriteBuffer - * - * Marks buffer contents as dirty (actual write happens later). - * - * Assume that buffer is pinned. Assume that reln is valid. - * - * Side Effects: - * Pin count is decremented. - */ -void -WriteBuffer(Buffer buffer) -{ - write_buffer(buffer, true); -} - -/* - * WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer - * when the operation is complete. - */ -void -WriteNoReleaseBuffer(Buffer buffer) -{ - write_buffer(buffer, false); } /* @@ -1617,8 +1591,7 @@ FlushRelationBuffers(Relation rel) } /* - * ReleaseBuffer -- remove the pin on a buffer without - * marking it dirty. + * ReleaseBuffer -- release the pin on a buffer */ void ReleaseBuffer(Buffer buffer) @@ -1652,6 +1625,18 @@ ReleaseBuffer(Buffer buffer) } /* + * UnlockReleaseBuffer -- release the content lock and pin on a buffer + * + * This is just a shorthand for a common combination. + */ +void +UnlockReleaseBuffer(Buffer buffer) +{ + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffer); +} + +/* * IncrBufferRefCount * Increment the pin count on a buffer that we have *already* pinned * at least once. @@ -1676,20 +1661,13 @@ IncrBufferRefCount(Buffer buffer) * * Mark a buffer dirty when we have updated tuple commit-status bits in it. * - * This is essentially the same as WriteNoReleaseBuffer. We preserve the - * distinction as a way of documenting that the caller has not made a critical - * data change --- the status-bit update could be redone by someone else just - * as easily. Therefore, no WAL log record need be generated, whereas calls - * to WriteNoReleaseBuffer really ought to be associated with a WAL-entry- - * creating action. - * - * This routine might get called many times on the same page, if we are making - * the first scan after commit of an xact that added/deleted many tuples. - * So, be as quick as we can if the buffer is already dirty. We do this by - * not acquiring spinlock if it looks like the status bits are already OK. - * (Note it is okay if someone else clears BM_JUST_DIRTIED immediately after - * we look, because the buffer content update is already done and will be - * reflected in the I/O.) + * This is essentially the same as MarkBufferDirty, except that the caller + * might have only share-lock instead of exclusive-lock on the buffer's + * content lock. We preserve the distinction mainly as a way of documenting + * that the caller has not made a critical data change --- the status-bit + * update could be redone by someone else just as easily. Therefore, no WAL + * log record need be generated, whereas calls to MarkBufferDirty really ought + * to be associated with a WAL-entry-creating action. */ void SetBufferCommitInfoNeedsSave(Buffer buffer) @@ -1701,19 +1679,32 @@ SetBufferCommitInfoNeedsSave(Buffer buffer) if (BufferIsLocal(buffer)) { - WriteLocalBuffer(buffer, false); + MarkLocalBufferDirty(buffer); return; } bufHdr = &BufferDescriptors[buffer - 1]; Assert(PrivateRefCount[buffer - 1] > 0); + /* here, either share or exclusive lock is OK */ + Assert(LWLockHeldByMe(bufHdr->content_lock)); + /* + * This routine might get called many times on the same page, if we are + * making the first scan after commit of an xact that added/deleted many + * tuples. So, be as quick as we can if the buffer is already dirty. We + * do this by not acquiring spinlock if it looks like the status bits are + * already OK. (Note it is okay if someone else clears BM_JUST_DIRTIED + * immediately after we look, because the buffer content update is already + * done and will be reflected in the I/O.) + */ if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) != (BM_DIRTY | BM_JUST_DIRTIED)) { LockBufHdr(bufHdr); Assert(bufHdr->refcount > 0); + if (!(bufHdr->flags & BM_DIRTY) && VacuumCostActive) + VacuumCostBalance += VacuumCostPageDirty; bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); UnlockBufHdr(bufHdr); } @@ -1767,7 +1758,7 @@ LockBuffer(Buffer buffer, int mode) Assert(BufferIsValid(buffer)); if (BufferIsLocal(buffer)) - return; + return; /* local buffers need no lock */ buf = &(BufferDescriptors[buffer - 1]); @@ -1776,19 +1767,7 @@ LockBuffer(Buffer buffer, int mode) else if (mode == BUFFER_LOCK_SHARE) LWLockAcquire(buf->content_lock, LW_SHARED); else if (mode == BUFFER_LOCK_EXCLUSIVE) - { LWLockAcquire(buf->content_lock, LW_EXCLUSIVE); - - /* - * This is not the best place to mark buffer dirty (eg indices do not - * always change buffer they lock in excl mode). But please remember - * that it's critical to set dirty bit *before* logging changes with - * XLogInsert() - see comments in SyncOneBuffer(). - */ - LockBufHdr(buf); - buf->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - UnlockBufHdr(buf); - } else elog(ERROR, "unrecognized buffer lock mode: %d", mode); } @@ -1809,21 +1788,7 @@ ConditionalLockBuffer(Buffer buffer) buf = &(BufferDescriptors[buffer - 1]); - if (LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE)) - { - /* - * This is not the best place to mark buffer dirty (eg indices do not - * always change buffer they lock in excl mode). But please remember - * that it's critical to set dirty bit *before* logging changes with - * XLogInsert() - see comments in SyncOneBuffer(). - */ - LockBufHdr(buf); - buf->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - UnlockBufHdr(buf); - - return true; - } - return false; + return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE); } /* diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 5a86a7c762..31d5c27e79 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.73 2006/03/05 15:58:36 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.74 2006/03/31 23:32:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -209,11 +209,11 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) } /* - * WriteLocalBuffer - - * writes out a local buffer (actually, just marks it dirty) + * MarkLocalBufferDirty - + * mark a local buffer dirty */ void -WriteLocalBuffer(Buffer buffer, bool release) +MarkLocalBufferDirty(Buffer buffer) { int bufid; BufferDesc *bufHdr; @@ -221,7 +221,7 @@ WriteLocalBuffer(Buffer buffer, bool release) Assert(BufferIsLocal(buffer)); #ifdef LBDEBUG - fprintf(stderr, "LB WRITE %d\n", buffer); + fprintf(stderr, "LB DIRTY %d\n", buffer); #endif bufid = -(buffer + 1); @@ -230,15 +230,6 @@ WriteLocalBuffer(Buffer buffer, bool release) bufHdr = &LocalBufferDescriptors[bufid]; bufHdr->flags |= BM_DIRTY; - - if (release) - { - LocalRefCount[bufid]--; - if (LocalRefCount[bufid] == 0 && - bufHdr->usage_count < BM_MAX_USAGE_COUNT) - bufHdr->usage_count++; - ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer); - } } /* |
