summaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer/freelist.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2004-04-19 23:27:17 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2004-04-19 23:27:17 +0000
commit011c3e62e78b02a573f6f59ce78b1e7515d380c0 (patch)
tree19c3c8ab77264aa49421cd432025f0fa25edbf60 /src/backend/storage/buffer/freelist.c
parent8f73bbae854b71bea6a544941886f072829beb72 (diff)
downloadpostgresql-011c3e62e78b02a573f6f59ce78b1e7515d380c0.tar.gz
Code review for ARC patch. Eliminate static variables, improve handling
of VACUUM cases so that VACUUM requests don't affect the ARC state at all, avoid corner case where BufferSync would uselessly rewrite a buffer that no longer contains the page that was to be flushed. Make some minor other cleanups in and around the bufmgr as well, such as moving PinBuffer and UnpinBuffer into bufmgr.c where they really belong.
Diffstat (limited to 'src/backend/storage/buffer/freelist.c')
-rw-r--r--src/backend/storage/buffer/freelist.c710
1 files changed, 325 insertions, 385 deletions
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 595e4905a8..c14d446497 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -3,210 +3,208 @@
* freelist.c
* routines for manipulating the buffer pool's replacement strategy.
*
+ * Note: all routines in this file assume that the BufMgrLock is held
+ * by the caller, so no synchronization is needed.
+ *
+ *
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.41 2004/02/12 15:06:56 wieck Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.42 2004/04/19 23:27:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-/*
- * OLD COMMENTS
- *
- * Data Structures:
- * SharedFreeList is a circular queue. Notice that this
- * is a shared memory queue so the next/prev "ptrs" are
- * buffer ids, not addresses.
- *
- * Sync: all routines in this file assume that the buffer
- * semaphore has been acquired by the caller.
- */
-
#include "postgres.h"
+#include "access/xact.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
-#include "storage/ipc.h"
-#include "storage/proc.h"
-#include "access/xact.h"
-#include "miscadmin.h"
-#ifndef MAX
-#define MAX(a,b) (((a) > (b)) ? (a) : (b))
-#endif
-#ifndef MIN
-#define MIN(a,b) (((a) < (b)) ? (a) : (b))
-#endif
+/* GUC variable: time in seconds between statistics reports */
+int DebugSharedBuffers = 0;
+
+/* Pointers to shared state */
static BufferStrategyControl *StrategyControl = NULL;
static BufferStrategyCDB *StrategyCDB = NULL;
-static int strategy_cdb_found;
-static int strategy_cdb_replace;
-static int strategy_get_from;
-
-int DebugSharedBuffers = 0;
-
-static bool strategy_hint_vacuum;
+/* Backend-local state about whether currently vacuuming */
+static bool strategy_hint_vacuum = false;
static TransactionId strategy_vacuum_xid;
-#define T1_TARGET StrategyControl->target_T1_size
-#define B1_LENGTH StrategyControl->listSize[STRAT_LIST_B1]
-#define T1_LENGTH StrategyControl->listSize[STRAT_LIST_T1]
-#define T2_LENGTH StrategyControl->listSize[STRAT_LIST_T2]
-#define B2_LENGTH StrategyControl->listSize[STRAT_LIST_B2]
+#define T1_TARGET (StrategyControl->target_T1_size)
+#define B1_LENGTH (StrategyControl->listSize[STRAT_LIST_B1])
+#define T1_LENGTH (StrategyControl->listSize[STRAT_LIST_T1])
+#define T2_LENGTH (StrategyControl->listSize[STRAT_LIST_T2])
+#define B2_LENGTH (StrategyControl->listSize[STRAT_LIST_B2])
/*
* Macro to remove a CDB from whichever list it currently is on
*/
#define STRAT_LIST_REMOVE(cdb) \
-{ \
- AssertMacro((cdb)->list >= 0 && (cdb)->list < STRAT_NUM_LISTS); \
- if ((cdb)->prev < 0) \
- StrategyControl->listHead[(cdb)->list] = (cdb)->next; \
- else \
- StrategyCDB[(cdb)->prev].next = (cdb)->next; \
- if ((cdb)->next < 0) \
- StrategyControl->listTail[(cdb)->list] = (cdb)->prev; \
- else \
- StrategyCDB[(cdb)->next].prev = (cdb)->prev; \
- StrategyControl->listSize[(cdb)->list]--; \
- (cdb)->list = STRAT_LIST_UNUSED; \
-}
+do { \
+ Assert((cdb)->list >= 0 && (cdb)->list < STRAT_NUM_LISTS); \
+ if ((cdb)->prev < 0) \
+ StrategyControl->listHead[(cdb)->list] = (cdb)->next; \
+ else \
+ StrategyCDB[(cdb)->prev].next = (cdb)->next; \
+ if ((cdb)->next < 0) \
+ StrategyControl->listTail[(cdb)->list] = (cdb)->prev; \
+ else \
+ StrategyCDB[(cdb)->next].prev = (cdb)->prev; \
+ StrategyControl->listSize[(cdb)->list]--; \
+ (cdb)->list = STRAT_LIST_UNUSED; \
+} while(0)
/*
* Macro to add a CDB to the tail of a list (MRU position)
*/
#define STRAT_MRU_INSERT(cdb,l) \
-{ \
- AssertMacro((cdb)->list == STRAT_LIST_UNUSED); \
- if (StrategyControl->listTail[(l)] < 0) \
- { \
- (cdb)->prev = (cdb)->next = -1; \
- StrategyControl->listHead[(l)] = \
- StrategyControl->listTail[(l)] = \
- ((cdb) - StrategyCDB); \
- } \
- else \
- { \
- (cdb)->next = -1; \
- (cdb)->prev = StrategyControl->listTail[(l)]; \
- StrategyCDB[StrategyControl->listTail[(l)]].next = \
- ((cdb) - StrategyCDB); \
- StrategyControl->listTail[(l)] = \
- ((cdb) - StrategyCDB); \
- } \
- StrategyControl->listSize[(l)]++; \
- (cdb)->list = (l); \
-}
+do { \
+ Assert((cdb)->list == STRAT_LIST_UNUSED); \
+ if (StrategyControl->listTail[(l)] < 0) \
+ { \
+ (cdb)->prev = (cdb)->next = -1; \
+ StrategyControl->listHead[(l)] = \
+ StrategyControl->listTail[(l)] = \
+ ((cdb) - StrategyCDB); \
+ } \
+ else \
+ { \
+ (cdb)->next = -1; \
+ (cdb)->prev = StrategyControl->listTail[(l)]; \
+ StrategyCDB[StrategyControl->listTail[(l)]].next = \
+ ((cdb) - StrategyCDB); \
+ StrategyControl->listTail[(l)] = \
+ ((cdb) - StrategyCDB); \
+ } \
+ StrategyControl->listSize[(l)]++; \
+ (cdb)->list = (l); \
+} while(0)
/*
* Macro to add a CDB to the head of a list (LRU position)
*/
#define STRAT_LRU_INSERT(cdb,l) \
-{ \
- AssertMacro((cdb)->list == STRAT_LIST_UNUSED); \
- if (StrategyControl->listHead[(l)] < 0) \
- { \
- (cdb)->prev = (cdb)->next = -1; \
- StrategyControl->listHead[(l)] = \
- StrategyControl->listTail[(l)] = \
- ((cdb) - StrategyCDB); \
- } \
- else \
- { \
- (cdb)->prev = -1; \
- (cdb)->next = StrategyControl->listHead[(l)]; \
- StrategyCDB[StrategyControl->listHead[(l)]].prev = \
- ((cdb) - StrategyCDB); \
- StrategyControl->listHead[(l)] = \
- ((cdb) - StrategyCDB); \
- } \
- StrategyControl->listSize[(l)]++; \
- (cdb)->list = (l); \
-}
+do { \
+ Assert((cdb)->list == STRAT_LIST_UNUSED); \
+ if (StrategyControl->listHead[(l)] < 0) \
+ { \
+ (cdb)->prev = (cdb)->next = -1; \
+ StrategyControl->listHead[(l)] = \
+ StrategyControl->listTail[(l)] = \
+ ((cdb) - StrategyCDB); \
+ } \
+ else \
+ { \
+ (cdb)->prev = -1; \
+ (cdb)->next = StrategyControl->listHead[(l)]; \
+ StrategyCDB[StrategyControl->listHead[(l)]].prev = \
+ ((cdb) - StrategyCDB); \
+ StrategyControl->listHead[(l)] = \
+ ((cdb) - StrategyCDB); \
+ } \
+ StrategyControl->listSize[(l)]++; \
+ (cdb)->list = (l); \
+} while(0)
+
+
+/*
+ * Printout for use when DebugSharedBuffers is enabled
+ */
+static void
+StrategyStatsDump(void)
+{
+ time_t now = time(NULL);
+
+ if (StrategyControl->stat_report + DebugSharedBuffers < now)
+ {
+ long all_hit, b1_hit, t1_hit, t2_hit, b2_hit;
+ int id, t1_clean, t2_clean;
+ ErrorContextCallback *errcxtold;
+ id = StrategyControl->listHead[STRAT_LIST_T1];
+ t1_clean = 0;
+ while (id >= 0)
+ {
+ if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY)
+ break;
+ t1_clean++;
+ id = StrategyCDB[id].next;
+ }
+ id = StrategyControl->listHead[STRAT_LIST_T2];
+ t2_clean = 0;
+ while (id >= 0)
+ {
+ if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY)
+ break;
+ t2_clean++;
+ id = StrategyCDB[id].next;
+ }
+
+ if (StrategyControl->num_lookup == 0)
+ {
+ all_hit = b1_hit = t1_hit = t2_hit = b2_hit = 0;
+ }
+ else
+ {
+ b1_hit = (StrategyControl->num_hit[STRAT_LIST_B1] * 100 /
+ StrategyControl->num_lookup);
+ t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 /
+ StrategyControl->num_lookup);
+ t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 /
+ StrategyControl->num_lookup);
+ b2_hit = (StrategyControl->num_hit[STRAT_LIST_B2] * 100 /
+ StrategyControl->num_lookup);
+ all_hit = b1_hit + t1_hit + t2_hit + b2_hit;
+ }
+
+ errcxtold = error_context_stack;
+ error_context_stack = NULL;
+ elog(DEBUG1, "ARC T1target=%5d B1len=%5d T1len=%5d T2len=%5d B2len=%5d",
+ T1_TARGET, B1_LENGTH, T1_LENGTH, T2_LENGTH, B2_LENGTH);
+ elog(DEBUG1, "ARC total =%4ld%% B1hit=%4ld%% T1hit=%4ld%% T2hit=%4ld%% B2hit=%4ld%%",
+ all_hit, b1_hit, t1_hit, t2_hit, b2_hit);
+ elog(DEBUG1, "ARC clean buffers at LRU T1= %5d T2= %5d",
+ t1_clean, t2_clean);
+ error_context_stack = errcxtold;
+
+ StrategyControl->num_lookup = 0;
+ StrategyControl->num_hit[STRAT_LIST_B1] = 0;
+ StrategyControl->num_hit[STRAT_LIST_T1] = 0;
+ StrategyControl->num_hit[STRAT_LIST_T2] = 0;
+ StrategyControl->num_hit[STRAT_LIST_B2] = 0;
+ StrategyControl->stat_report = now;
+ }
+}
/*
* StrategyBufferLookup
*
* Lookup a page request in the cache directory. A buffer is only
- * returned for a T1 or T2 cache hit. B1 and B2 hits are only
- * remembered here to later affect the behaviour.
+ * returned for a T1 or T2 cache hit. B1 and B2 hits are just
+ * remembered here, to possibly affect the behaviour later.
+ *
+ * recheck indicates we are rechecking after I/O wait; do not change
+ * internal status in this case.
+ *
+ * *cdb_found_index is set to the index of the found CDB, or -1 if none.
+ * This is not intended to be used by the caller, except to pass to
+ * StrategyReplaceBuffer().
*/
BufferDesc *
-StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
+StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
+ int *cdb_found_index)
{
BufferStrategyCDB *cdb;
- time_t now;
+ /* Optional stats printout */
if (DebugSharedBuffers > 0)
- {
- time(&now);
- if (StrategyControl->stat_report + DebugSharedBuffers < now)
- {
- long all_hit, b1_hit, t1_hit, t2_hit, b2_hit;
- int id, t1_clean, t2_clean;
- ErrorContextCallback *errcxtold;
-
- id = StrategyControl->listHead[STRAT_LIST_T1];
- t1_clean = 0;
- while (id >= 0)
- {
- if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY)
- break;
- t1_clean++;
- id = StrategyCDB[id].next;
- }
- id = StrategyControl->listHead[STRAT_LIST_T2];
- t2_clean = 0;
- while (id >= 0)
- {
- if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY)
- break;
- t2_clean++;
- id = StrategyCDB[id].next;
- }
-
- if (StrategyControl->num_lookup == 0)
- {
- all_hit = b1_hit = t1_hit = t2_hit = b2_hit = 0;
- }
- else
- {
- b1_hit = (StrategyControl->num_hit[STRAT_LIST_B1] * 100 /
- StrategyControl->num_lookup);
- t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 /
- StrategyControl->num_lookup);
- t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 /
- StrategyControl->num_lookup);
- b2_hit = (StrategyControl->num_hit[STRAT_LIST_B2] * 100 /
- StrategyControl->num_lookup);
- all_hit = b1_hit + t1_hit + t2_hit + b2_hit;
- }
-
- errcxtold = error_context_stack;
- error_context_stack = NULL;
- elog(DEBUG1, "ARC T1target=%5d B1len=%5d T1len=%5d T2len=%5d B2len=%5d",
- T1_TARGET, B1_LENGTH, T1_LENGTH, T2_LENGTH, B2_LENGTH);
- elog(DEBUG1, "ARC total =%4ld%% B1hit=%4ld%% T1hit=%4ld%% T2hit=%4ld%% B2hit=%4ld%%",
- all_hit, b1_hit, t1_hit, t2_hit, b2_hit);
- elog(DEBUG1, "ARC clean buffers at LRU T1= %5d T2= %5d",
- t1_clean, t2_clean);
- error_context_stack = errcxtold;
-
- StrategyControl->num_lookup = 0;
- StrategyControl->num_hit[STRAT_LIST_B1] = 0;
- StrategyControl->num_hit[STRAT_LIST_T1] = 0;
- StrategyControl->num_hit[STRAT_LIST_T2] = 0;
- StrategyControl->num_hit[STRAT_LIST_B2] = 0;
- StrategyControl->stat_report = now;
- }
- }
+ StrategyStatsDump();
/*
* Count lookups
@@ -216,75 +214,75 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
/*
* Lookup the block in the shared hash table
*/
- strategy_cdb_found = BufTableLookup(tagPtr);
+ *cdb_found_index = BufTableLookup(tagPtr);
/*
- * Handle CDB lookup miss
+ * Done if complete CDB lookup miss
*/
- if (strategy_cdb_found < 0)
- {
- if (!recheck)
- {
- /*
- * This is an initial lookup and we have a complete
- * cache miss (block found nowhere). This means we
- * remember according to the current T1 size and the
- * target T1 size from where we take a block if we
- * need one later.
- */
- if (T1_LENGTH >= MAX(1, T1_TARGET))
- strategy_get_from = STRAT_LIST_T1;
- else
- strategy_get_from = STRAT_LIST_T2;
- }
-
- /*
- * Do the cost accounting for vacuum
- */
- if (VacuumCostActive)
- VacuumCostBalance += VacuumCostPageMiss;
-
- /* report cache miss */
+ if (*cdb_found_index < 0)
return NULL;
- }
/*
* We found a CDB
*/
- cdb = &StrategyCDB[strategy_cdb_found];
+ cdb = &StrategyCDB[*cdb_found_index];
/*
* Count hits
*/
StrategyControl->num_hit[cdb->list]++;
- if (VacuumCostActive)
- VacuumCostBalance += VacuumCostPageHit;
/*
* If this is a T2 hit, we simply move the CDB to the
* T2 MRU position and return the found buffer.
+ *
+ * A CDB in T2 cannot have t1_vacuum set, so we needn't check. However,
+ * if the current process is VACUUM then it doesn't promote to MRU.
*/
if (cdb->list == STRAT_LIST_T2)
{
- STRAT_LIST_REMOVE(cdb);
- STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
+ if (!strategy_hint_vacuum)
+ {
+ STRAT_LIST_REMOVE(cdb);
+ STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
+ }
return &BufferDescriptors[cdb->buf_id];
}
/*
- * If this is a T1 hit, we move the buffer to the T2 MRU
- * only if another transaction had read it into T1. This is
- * required because any UPDATE or DELETE in PostgreSQL does
- * multiple ReadBuffer(), first during the scan, later during
- * the heap_update() or heap_delete().
+ * If this is a T1 hit, we move the buffer to the T2 MRU only if another
+ * transaction had read it into T1, *and* neither transaction is a VACUUM.
+ * This is required because any UPDATE or DELETE in PostgreSQL does
+ * multiple ReadBuffer(), first during the scan, later during the
+ * heap_update() or heap_delete(). Otherwise move to T1 MRU. VACUUM
+ * doesn't even get to make that happen.
*/
if (cdb->list == STRAT_LIST_T1)
{
- if (!TransactionIdIsCurrentTransactionId(cdb->t1_xid))
+ if (!strategy_hint_vacuum)
{
- STRAT_LIST_REMOVE(cdb);
- STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
+ if (!cdb->t1_vacuum &&
+ !TransactionIdIsCurrentTransactionId(cdb->t1_xid))
+ {
+ STRAT_LIST_REMOVE(cdb);
+ STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
+ }
+ else
+ {
+ STRAT_LIST_REMOVE(cdb);
+ STRAT_MRU_INSERT(cdb, STRAT_LIST_T1);
+ /*
+ * If a non-VACUUM process references a page recently loaded
+ * by VACUUM, clear the stigma; the state will now be the
+ * same as if this process loaded it originally.
+ */
+ if (cdb->t1_vacuum)
+ {
+ cdb->t1_xid = GetCurrentTransactionId();
+ cdb->t1_vacuum = false;
+ }
+ }
}
return &BufferDescriptors[cdb->buf_id];
@@ -292,17 +290,19 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
/*
* In the case of a recheck we don't care about B1 or B2 hits here.
- * The bufmgr does this call only to make sure noone faulted in the
- * block while we where busy flushing another. Now for this really
- * to end up as a B1 or B2 cache hit, we must have been flushing for
- * quite some time as the block not only must have been read, but
- * also traveled through the queue and evicted from the T cache again
- * already.
+ * The bufmgr does this call only to make sure no-one faulted in the
+ * block while we where busy flushing another; we don't want to doubly
+ * adjust the T1target.
+ *
+ * Now for this really to end up as a B1 or B2 cache hit, we must have
+ * been flushing for quite some time as the block not only must have been
+ * read, but also traveled through the queue and evicted from the T cache
+ * again already.
+ *
+ * VACUUM re-reads shouldn't adjust the target either.
*/
- if (recheck)
- {
+ if (recheck || strategy_hint_vacuum)
return NULL;
- }
/*
* Adjust the target size of the T1 cache depending on if this is
@@ -316,8 +316,8 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
* small. Adjust the T1 target size and continue
* below.
*/
- T1_TARGET = MIN(T1_TARGET + MAX(B2_LENGTH / B1_LENGTH, 1),
- Data_Descriptors);
+ T1_TARGET = Min(T1_TARGET + Max(B2_LENGTH / B1_LENGTH, 1),
+ NBuffers);
break;
case STRAT_LIST_B2:
@@ -325,26 +325,17 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
* B2 hit means that the T2 cache is probably too
* small. Adjust the T1 target size and continue
* below.
- */
- T1_TARGET = MAX(T1_TARGET - MAX(B1_LENGTH / B2_LENGTH, 1), 0);
+ */
+ T1_TARGET = Max(T1_TARGET - Max(B1_LENGTH / B2_LENGTH, 1), 0);
break;
default:
- elog(ERROR, "Buffer hash table corrupted - CDB on list %d found",
- cdb->list);
+ elog(ERROR, "buffer hash table corrupted: CDB->list = %d",
+ cdb->list);
}
/*
- * Decide where to take from if we will be out of
- * free blocks later in StrategyGetBuffer().
- */
- if (T1_LENGTH >= MAX(1, T1_TARGET))
- strategy_get_from = STRAT_LIST_T1;
- else
- strategy_get_from = STRAT_LIST_T2;
-
- /*
- * Even if we had seen the block in the past, it's data is
+ * Even though we had seen the block in the past, its data is
* not currently in memory ... cache miss to the bufmgr.
*/
return NULL;
@@ -357,18 +348,25 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
* Called by the bufmgr to get the next candidate buffer to use in
* BufferAlloc(). The only hard requirement BufferAlloc() has is that
* this buffer must not currently be pinned.
+ *
+ * *cdb_replace_index is set to the index of the candidate CDB, or -1 if
+ * none (meaning we are using a previously free buffer). This is not
+ * intended to be used by the caller, except to pass to
+ * StrategyReplaceBuffer().
*/
BufferDesc *
-StrategyGetBuffer(void)
+StrategyGetBuffer(int *cdb_replace_index)
{
int cdb_id;
BufferDesc *buf;
if (StrategyControl->listFreeBuffers < 0)
{
- /* We don't have a free buffer, must take one from T1 or T2 */
-
- if (strategy_get_from == STRAT_LIST_T1)
+ /*
+ * We don't have a free buffer, must take one from T1 or T2.
+ * Choose based on trying to converge T1len to T1target.
+ */
+ if (T1_LENGTH >= Max(1, T1_TARGET))
{
/*
* We should take the first unpinned buffer from T1.
@@ -379,7 +377,7 @@ StrategyGetBuffer(void)
buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
if (buf->refcount == 0)
{
- strategy_cdb_replace = cdb_id;
+ *cdb_replace_index = cdb_id;
Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1);
return buf;
}
@@ -387,7 +385,7 @@ StrategyGetBuffer(void)
}
/*
- * No unpinned T1 buffer found - pardon T2 cache.
+ * No unpinned T1 buffer found - try T2 cache.
*/
cdb_id = StrategyControl->listHead[STRAT_LIST_T2];
while (cdb_id >= 0)
@@ -395,7 +393,7 @@ StrategyGetBuffer(void)
buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
if (buf->refcount == 0)
{
- strategy_cdb_replace = cdb_id;
+ *cdb_replace_index = cdb_id;
Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2);
return buf;
}
@@ -405,7 +403,7 @@ StrategyGetBuffer(void)
/*
* No unpinned buffers at all!!!
*/
- elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers");
+ elog(ERROR, "no unpinned buffers available");
}
else
{
@@ -418,7 +416,7 @@ StrategyGetBuffer(void)
buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
if (buf->refcount == 0)
{
- strategy_cdb_replace = cdb_id;
+ *cdb_replace_index = cdb_id;
Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2);
return buf;
}
@@ -426,7 +424,7 @@ StrategyGetBuffer(void)
}
/*
- * No unpinned T2 buffer found - pardon T1 cache.
+ * No unpinned T2 buffer found - try T1 cache.
*/
cdb_id = StrategyControl->listHead[STRAT_LIST_T1];
while (cdb_id >= 0)
@@ -434,7 +432,7 @@ StrategyGetBuffer(void)
buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
if (buf->refcount == 0)
{
- strategy_cdb_replace = cdb_id;
+ *cdb_replace_index = cdb_id;
Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1);
return buf;
}
@@ -444,7 +442,7 @@ StrategyGetBuffer(void)
/*
* No unpinned buffers at all!!!
*/
- elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers");
+ elog(ERROR, "no unpinned buffers available");
}
}
else
@@ -459,13 +457,13 @@ StrategyGetBuffer(void)
* that there will never be any reason to recheck. Otherwise
* we would leak shared buffers here!
*/
- strategy_cdb_replace = -1;
+ *cdb_replace_index = -1;
buf = &BufferDescriptors[StrategyControl->listFreeBuffers];
StrategyControl->listFreeBuffers = buf->bufNext;
buf->bufNext = -1;
- /* Buffer of freelist cannot be pinned */
+ /* Buffer in freelist cannot be pinned */
Assert(buf->refcount == 0);
Assert(!(buf->flags & BM_DIRTY));
@@ -480,54 +478,59 @@ StrategyGetBuffer(void)
/*
* StrategyReplaceBuffer
*
- * Called by the buffer manager to inform us that he possibly flushed
- * a buffer and is now about to replace the content. Prior to this call,
+ * Called by the buffer manager to inform us that he flushed a buffer
+ * and is now about to replace the content. Prior to this call,
* the cache algorithm still reports the buffer as in the cache. After
* this call we report the new block, even if IO might still need to
- * start.
+ * be done to bring in the new content.
+ *
+ * cdb_found_index and cdb_replace_index must be the auxiliary values
+ * returned by previous calls to StrategyBufferLookup and StrategyGetBuffer.
*/
void
-StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
+StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
+ int cdb_found_index, int cdb_replace_index)
{
BufferStrategyCDB *cdb_found;
BufferStrategyCDB *cdb_replace;
- if (strategy_cdb_found >= 0)
+ if (cdb_found_index >= 0)
{
- /* This was a ghost buffer cache hit (B1 or B2) */
- cdb_found = &StrategyCDB[strategy_cdb_found];
+ /* This must have been a ghost buffer cache hit (B1 or B2) */
+ cdb_found = &StrategyCDB[cdb_found_index];
/* Assert that the buffer remembered in cdb_found is the one */
/* the buffer manager is currently faulting in */
- Assert(BUFFERTAG_EQUALS(&(cdb_found->buf_tag), rnode, blockNum));
+ Assert(BUFFERTAGS_EQUAL(&(cdb_found->buf_tag), newTag));
- if (strategy_cdb_replace >= 0)
+ if (cdb_replace_index >= 0)
{
/* We are satisfying it with an evicted T buffer */
- cdb_replace = &StrategyCDB[strategy_cdb_replace];
+ cdb_replace = &StrategyCDB[cdb_replace_index];
/* Assert that the buffer remembered in cdb_replace is */
/* the one the buffer manager has just evicted */
Assert(cdb_replace->list == STRAT_LIST_T1 ||
- cdb_replace->list == STRAT_LIST_T2);
+ cdb_replace->list == STRAT_LIST_T2);
Assert(cdb_replace->buf_id == buf->buf_id);
Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
- /* If this was a T1 buffer faulted in by vacuum, just */
- /* do not cause the CDB end up in the B1 list, so that */
- /* the vacuum scan does not affect T1_target adjusting */
- if (strategy_hint_vacuum)
+ /*
+ * Under normal circumstances we move the evicted T list entry to
+ * the corresponding B list. However, T1 entries that exist only
+ * because of VACUUM are just thrown into the unused list instead.
+ * We don't expect them to be touched again by the VACUUM, and if
+ * we put them into B1 then VACUUM would skew T1_target adjusting.
+ */
+ if (cdb_replace->t1_vacuum)
{
BufTableDelete(&(cdb_replace->buf_tag));
STRAT_LIST_REMOVE(cdb_replace);
- cdb_replace->buf_id = -1;
cdb_replace->next = StrategyControl->listUnusedCDB;
- StrategyControl->listUnusedCDB = strategy_cdb_replace;
+ StrategyControl->listUnusedCDB = cdb_replace_index;
}
else
{
- /* Under normal circumstances move the evicted */
- /* T list entry to it's corresponding B list */
if (cdb_replace->list == STRAT_LIST_T1)
{
STRAT_LIST_REMOVE(cdb_replace);
@@ -539,25 +542,26 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2);
}
}
- /* And clear it's block reference */
+ /* And clear its block reference */
cdb_replace->buf_id = -1;
}
else
{
- /* or we satisfy it with an unused buffer */
+ /* We are satisfying it with an unused buffer */
}
- /* Now the found B CDB get's the buffer and is moved to T2 */
+ /* Now the found B CDB gets the buffer and is moved to T2 */
cdb_found->buf_id = buf->buf_id;
STRAT_LIST_REMOVE(cdb_found);
STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2);
}
else
{
- /* This was a complete cache miss, so we need to create */
- /* a new CDB. The goal is to keep T1len+B1len <= c */
-
- if (B1_LENGTH > 0 && (T1_LENGTH + B1_LENGTH) >= Data_Descriptors)
+ /*
+ * This was a complete cache miss, so we need to create
+ * a new CDB. The goal is to keep T1len+B1len <= c.
+ */
+ if (B1_LENGTH > 0 && (T1_LENGTH + B1_LENGTH) >= NBuffers)
{
/* So if B1 isn't empty and T1len+B1len >= c we take B1-LRU */
cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]];
@@ -587,18 +591,20 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
}
}
- /* Set the CDB's buf_tag and insert the hash key */
- INIT_BUFFERTAG(&(cdb_found->buf_tag), rnode, blockNum);
+ /* Set the CDB's buf_tag and insert it into the hash table */
+ cdb_found->buf_tag = *newTag;
BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB));
- if (strategy_cdb_replace >= 0)
+ if (cdb_replace_index >= 0)
{
- /* The buffer was formerly in a T list, move it's CDB
- * to the corresponding B list */
- cdb_replace = &StrategyCDB[strategy_cdb_replace];
+ /*
+ * The buffer was formerly in a T list, move its CDB
+ * to the corresponding B list
+ */
+ cdb_replace = &StrategyCDB[cdb_replace_index];
Assert(cdb_replace->list == STRAT_LIST_T1 ||
- cdb_replace->list == STRAT_LIST_T2);
+ cdb_replace->list == STRAT_LIST_T2);
Assert(cdb_replace->buf_id == buf->buf_id);
Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
@@ -612,32 +618,32 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
STRAT_LIST_REMOVE(cdb_replace);
STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2);
}
- /* And clear it's block reference */
+ /* And clear its block reference */
cdb_replace->buf_id = -1;
}
else
{
- /* or we satisfy it with an unused buffer */
+ /* We are satisfying it with an unused buffer */
}
/* Assign the buffer id to the new CDB */
cdb_found->buf_id = buf->buf_id;
/*
- * Specialized VACUUM optimization. If this "complete cache miss"
- * happened because vacuum needed the page, we want it later on
- * to be placed at the LRU instead of the MRU position of T1.
+ * Specialized VACUUM optimization. If this complete cache miss
+ * happened because vacuum needed the page, we place it at the LRU
+ * position of T1; normally it goes at the MRU position.
*/
if (strategy_hint_vacuum)
{
- if (strategy_vacuum_xid != GetCurrentTransactionId())
+ if (TransactionIdIsCurrentTransactionId(strategy_vacuum_xid))
+ STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1);
+ else
{
+ /* VACUUM must have been aborted by error, reset flag */
strategy_hint_vacuum = false;
STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
}
- else
- STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1);
-
}
else
STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
@@ -645,8 +651,10 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
/*
* Remember the Xid when this buffer went onto T1 to avoid
* a single UPDATE promoting a newcomer straight into T2.
+ * Also remember if it was loaded for VACUUM.
*/
cdb_found->t1_xid = GetCurrentTransactionId();
+ cdb_found->t1_vacuum = strategy_hint_vacuum;
}
}
@@ -673,8 +681,7 @@ StrategyInvalidateBuffer(BufferDesc *buf)
*/
cdb_id = BufTableLookup(&(buf->tag));
if (cdb_id < 0)
- elog(ERROR, "StrategyInvalidateBuffer() buffer %d not in directory",
- buf->buf_id);
+ elog(ERROR, "buffer %d not in buffer hash table", buf->buf_id);
cdb = &StrategyCDB[cdb_id];
/*
@@ -694,7 +701,7 @@ StrategyInvalidateBuffer(BufferDesc *buf)
StrategyControl->listUnusedCDB = cdb_id;
/*
- * Clear out the buffers tag and add it to the list of
+ * Clear out the buffer's tag and add it to the list of
* currently unused buffers.
*/
CLEAR_BUFFERTAG(&(buf->tag));
@@ -702,7 +709,9 @@ StrategyInvalidateBuffer(BufferDesc *buf)
StrategyControl->listFreeBuffers = buf->buf_id;
}
-
+/*
+ * StrategyHintVacuum -- tell us whether VACUUM is active
+ */
void
StrategyHintVacuum(bool vacuum_active)
{
@@ -710,9 +719,24 @@ StrategyHintVacuum(bool vacuum_active)
strategy_vacuum_xid = GetCurrentTransactionId();
}
-
+/*
+ * StrategyDirtyBufferList
+ *
+ * Returns a list of dirty buffers, in priority order for writing.
+ * Note that the caller may choose not to write them all.
+ *
+ * The caller must beware of the possibility that a buffer is no longer dirty,
+ * or even contains a different page, by the time he reaches it. If it no
+ * longer contains the same page it need not be written, even if it is (again)
+ * dirty.
+ *
+ * Buffer pointers are stored into buffers[], and corresponding tags into
+ * buftags[], both of size max_buffers. The function returns the number of
+ * buffer IDs stored.
+ */
int
-StrategyDirtyBufferList(int *buffer_list, int max_buffers)
+StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
+ int max_buffers)
{
int num_buffer_dirty = 0;
int cdb_id_t1;
@@ -724,13 +748,13 @@ StrategyDirtyBufferList(int *buffer_list, int max_buffers)
* Traverse the T1 and T2 list LRU to MRU in "parallel"
* and add all dirty buffers found in that order to the list.
* The ARC strategy keeps all used buffers including pinned ones
- * in the T1 or T2 list. So we cannot loose any dirty buffers.
+ * in the T1 or T2 list. So we cannot miss any dirty buffers.
*/
cdb_id_t1 = StrategyControl->listHead[STRAT_LIST_T1];
cdb_id_t2 = StrategyControl->listHead[STRAT_LIST_T2];
while ((cdb_id_t1 >= 0 || cdb_id_t2 >= 0) &&
- num_buffer_dirty < max_buffers)
+ num_buffer_dirty < max_buffers)
{
if (cdb_id_t1 >= 0)
{
@@ -741,7 +765,9 @@ StrategyDirtyBufferList(int *buffer_list, int max_buffers)
{
if ((buf->flags & BM_DIRTY) || (buf->cntxDirty))
{
- buffer_list[num_buffer_dirty++] = buf_id;
+ buffers[num_buffer_dirty] = buf;
+ buftags[num_buffer_dirty] = buf->tag;
+ num_buffer_dirty++;
}
}
@@ -757,7 +783,9 @@ StrategyDirtyBufferList(int *buffer_list, int max_buffers)
{
if ((buf->flags & BM_DIRTY) || (buf->cntxDirty))
{
- buffer_list[num_buffer_dirty++] = buf_id;
+ buffers[num_buffer_dirty] = buf;
+ buftags[num_buffer_dirty] = buf->tag;
+ num_buffer_dirty++;
}
}
@@ -785,16 +813,16 @@ StrategyInitialize(bool init)
/*
* Initialize the shared CDB lookup hashtable
*/
- InitBufTable(Data_Descriptors * 2);
+ InitBufTable(NBuffers * 2);
/*
* Get or create the shared strategy control block and the CDB's
*/
StrategyControl = (BufferStrategyControl *)
- ShmemInitStruct("Buffer Strategy Status",
- sizeof(BufferStrategyControl) +
- sizeof(BufferStrategyCDB) * (Data_Descriptors * 2 - 1),
- &found);
+ ShmemInitStruct("Buffer Strategy Status",
+ sizeof(BufferStrategyControl) +
+ sizeof(BufferStrategyCDB) * (NBuffers * 2 - 1),
+ &found);
StrategyCDB = &(StrategyControl->cdb[0]);
if (!found)
@@ -805,8 +833,8 @@ StrategyInitialize(bool init)
Assert(init);
/*
- * Grab the whole linked list of free buffers for our
- * strategy
+ * Grab the whole linked list of free buffers for our strategy.
+ * We assume it was previously set up by InitBufferPool().
*/
StrategyControl->listFreeBuffers = 0;
@@ -814,7 +842,7 @@ StrategyInitialize(bool init)
* We start off with a target T1 list size of
* half the available cache blocks.
*/
- StrategyControl->target_T1_size = Data_Descriptors / 2;
+ StrategyControl->target_T1_size = NBuffers / 2;
/*
* Initialize B1, T1, T2 and B2 lists to be empty
@@ -832,14 +860,14 @@ StrategyInitialize(bool init)
/*
* All CDB's are linked as the listUnusedCDB
*/
- for (i = 0; i < Data_Descriptors * 2; i++)
+ for (i = 0; i < NBuffers * 2; i++)
{
StrategyCDB[i].next = i + 1;
StrategyCDB[i].list = STRAT_LIST_UNUSED;
CLEAR_BUFFERTAG(&(StrategyCDB[i].buf_tag));
StrategyCDB[i].buf_id = -1;
}
- StrategyCDB[Data_Descriptors * 2 - 1].next = -1;
+ StrategyCDB[NBuffers * 2 - 1].next = -1;
StrategyControl->listUnusedCDB = 0;
}
else
@@ -847,91 +875,3 @@ StrategyInitialize(bool init)
Assert(!init);
}
}
-
-
-#undef PinBuffer
-
-/*
- * PinBuffer -- make buffer unavailable for replacement.
- *
- * This should be applied only to shared buffers, never local ones.
- * Bufmgr lock must be held by caller.
- */
-void
-PinBuffer(BufferDesc *buf)
-{
- int b = BufferDescriptorGetBuffer(buf) - 1;
-
- if (PrivateRefCount[b] == 0)
- buf->refcount++;
- PrivateRefCount[b]++;
- Assert(PrivateRefCount[b] > 0);
-}
-
-#ifdef NOT_USED
-void
-PinBuffer_Debug(char *file, int line, BufferDesc *buf)
-{
- PinBuffer(buf);
- if (ShowPinTrace)
- {
- Buffer buffer = BufferDescriptorGetBuffer(buf);
-
- fprintf(stderr, "PIN(Pin) %ld relname = %s, blockNum = %d, \
-refcount = %ld, file: %s, line: %d\n",
- buffer, buf->blind.relname, buf->tag.blockNum,
- PrivateRefCount[buffer - 1], file, line);
- }
-}
-#endif
-
-#undef UnpinBuffer
-
-/*
- * UnpinBuffer -- make buffer available for replacement.
- *
- * This should be applied only to shared buffers, never local ones.
- * Bufmgr lock must be held by caller.
- */
-void
-UnpinBuffer(BufferDesc *buf)
-{
- int b = BufferDescriptorGetBuffer(buf) - 1;
-
- Assert(buf->refcount > 0);
- Assert(PrivateRefCount[b] > 0);
- PrivateRefCount[b]--;
- if (PrivateRefCount[b] == 0)
- buf->refcount--;
-
- if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 &&
- buf->refcount == 1)
- {
- /* we just released the last pin other than the waiter's */
- buf->flags &= ~BM_PIN_COUNT_WAITER;
- ProcSendSignal(buf->wait_backend_id);
- }
- else
- {
- /* do nothing */
- }
-}
-
-#ifdef NOT_USED
-void
-UnpinBuffer_Debug(char *file, int line, BufferDesc *buf)
-{
- UnpinBuffer(buf);
- if (ShowPinTrace)
- {
- Buffer buffer = BufferDescriptorGetBuffer(buf);
-
- fprintf(stderr, "UNPIN(Unpin) %ld relname = %s, blockNum = %d, \
-refcount = %ld, file: %s, line: %d\n",
- buffer, buf->blind.relname, buf->tag.blockNum,
- PrivateRefCount[buffer - 1], file, line);
- }
-}
-#endif
-
-