summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2008-09-30 10:52:14 +0000
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2008-09-30 10:52:14 +0000
commit15c121b3ed7eb2f290e19533e41ccca734d23574 (patch)
treeb60226d720f87b82b5b44647e3d3031081cdfb07 /src/include
parent2dbc0ca937f8ba9c76866a99fd04866232acea95 (diff)
downloadpostgresql-15c121b3ed7eb2f290e19533e41ccca734d23574.tar.gz
Rewrite the FSM. Instead of relying on a fixed-size shared memory segment, the
free space information is stored in a dedicated FSM relation fork, with each relation (except for hash indexes; they don't use FSM). This eliminates the max_fsm_relations and max_fsm_pages GUC options; remove any trace of them from the backend, initdb, and documentation. Rewrite contrib/pg_freespacemap to match the new FSM implementation. Also introduce a new variant of the get_raw_page(regclass, int4, int4) function in contrib/pageinspect that let's you to return pages from any relation fork, and a new fsm_page_contents() function to inspect the new FSM pages.
Diffstat (limited to 'src/include')
-rw-r--r--src/include/access/rmgr.h3
-rw-r--r--src/include/storage/freespace.h150
-rw-r--r--src/include/storage/fsm_internals.h73
-rw-r--r--src/include/storage/indexfsm.h27
-rw-r--r--src/include/storage/lwlock.h8
-rw-r--r--src/include/storage/relfilenode.h9
-rw-r--r--src/include/utils/guc_tables.h3
-rw-r--r--src/include/utils/rel.h5
8 files changed, 133 insertions, 145 deletions
diff --git a/src/include/access/rmgr.h b/src/include/access/rmgr.h
index 7be2dfc9f6..6f018f0bee 100644
--- a/src/include/access/rmgr.h
+++ b/src/include/access/rmgr.h
@@ -3,7 +3,7 @@
*
* Resource managers definition
*
- * $PostgreSQL: pgsql/src/include/access/rmgr.h,v 1.17 2006/11/05 22:42:10 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/rmgr.h,v 1.18 2008/09/30 10:52:13 heikki Exp $
*/
#ifndef RMGR_H
#define RMGR_H
@@ -23,6 +23,7 @@ typedef uint8 RmgrId;
#define RM_DBASE_ID 4
#define RM_TBLSPC_ID 5
#define RM_MULTIXACT_ID 6
+#define RM_FREESPACE_ID 7
#define RM_HEAP2_ID 9
#define RM_HEAP_ID 10
#define RM_BTREE_ID 11
diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h
index 86dd22647c..d417e8c980 100644
--- a/src/include/storage/freespace.h
+++ b/src/include/storage/freespace.h
@@ -7,152 +7,32 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.28 2008/03/10 02:04:10 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.29 2008/09/30 10:52:13 heikki Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef FREESPACE_H_
#define FREESPACE_H_
-#include "storage/relfilenode.h"
-#include "storage/itemptr.h"
+#include "utils/rel.h"
+#include "storage/bufpage.h"
+#include "access/xlog.h"
-
-/* Initial value for average-request moving average */
-#define INITIAL_AVERAGE ((Size) (BLCKSZ / 32))
-
-/*
- * Number of pages and bytes per allocation chunk. Indexes can squeeze 50%
- * more pages into the same space because they don't need to remember how much
- * free space on each page. The nominal number of pages, CHUNKPAGES, is for
- * regular rels, and INDEXCHUNKPAGES is for indexes. CHUNKPAGES should be
- * even so that no space is wasted in the index case.
- */
-#define CHUNKPAGES 16
-#define CHUNKBYTES (CHUNKPAGES * sizeof(FSMPageData))
-#define INDEXCHUNKPAGES ((int) (CHUNKBYTES / sizeof(IndexFSMPageData)))
-
-
-/*
- * Typedefs and macros for items in the page-storage arena. We use the
- * existing ItemPointer and BlockId data structures, which are designed
- * to pack well (they should be 6 and 4 bytes apiece regardless of machine
- * alignment issues). Unfortunately we can't use the ItemPointer access
- * macros, because they include Asserts insisting that ip_posid != 0.
- */
-typedef ItemPointerData FSMPageData;
-typedef BlockIdData IndexFSMPageData;
-
-#define FSMPageGetPageNum(ptr) \
- BlockIdGetBlockNumber(&(ptr)->ip_blkid)
-#define FSMPageGetSpace(ptr) \
- ((Size) (ptr)->ip_posid)
-#define FSMPageSetPageNum(ptr, pg) \
- BlockIdSet(&(ptr)->ip_blkid, pg)
-#define FSMPageSetSpace(ptr, sz) \
- ((ptr)->ip_posid = (OffsetNumber) (sz))
-#define IndexFSMPageGetPageNum(ptr) \
- BlockIdGetBlockNumber(ptr)
-#define IndexFSMPageSetPageNum(ptr, pg) \
- BlockIdSet(ptr, pg)
-
-/*
- * Shared free-space-map objects
- *
- * The per-relation objects are indexed by a hash table, and are also members
- * of two linked lists: one ordered by recency of usage (most recent first),
- * and the other ordered by physical location of the associated storage in
- * the page-info arena.
- *
- * Each relation owns one or more chunks of per-page storage in the "arena".
- * The chunks for each relation are always consecutive, so that it can treat
- * its page storage as a simple array. We further insist that its page data
- * be ordered by block number, so that binary search is possible.
- *
- * Note: we handle pointers to these items as pointers, not as SHMEM_OFFSETs.
- * This assumes that all processes accessing the map will have the shared
- * memory segment mapped at the same place in their address space.
- */
-typedef struct FSMHeader FSMHeader;
-typedef struct FSMRelation FSMRelation;
-
-/* Header for whole map */
-struct FSMHeader
-{
- FSMRelation *usageList; /* FSMRelations in usage-recency order */
- FSMRelation *usageListTail; /* tail of usage-recency list */
- FSMRelation *firstRel; /* FSMRelations in arena storage order */
- FSMRelation *lastRel; /* tail of storage-order list */
- int numRels; /* number of FSMRelations now in use */
- double sumRequests; /* sum of requested chunks over all rels */
- char *arena; /* arena for page-info storage */
- int totalChunks; /* total size of arena, in chunks */
- int usedChunks; /* # of chunks assigned */
- /* NB: there are totalChunks - usedChunks free chunks at end of arena */
-};
-
-/*
- * Per-relation struct --- this is an entry in the shared hash table.
- * The hash key is the RelFileNode value (hence, we look at the physical
- * relation ID, not the logical ID, which is appropriate).
- */
-struct FSMRelation
-{
- RelFileNode key; /* hash key (must be first) */
- FSMRelation *nextUsage; /* next rel in usage-recency order */
- FSMRelation *priorUsage; /* prior rel in usage-recency order */
- FSMRelation *nextPhysical; /* next rel in arena-storage order */
- FSMRelation *priorPhysical; /* prior rel in arena-storage order */
- bool isIndex; /* if true, we store only page numbers */
- Size avgRequest; /* moving average of space requests */
- BlockNumber interestingPages; /* # of pages with useful free space */
- int firstChunk; /* chunk # of my first chunk in arena */
- int storedPages; /* # of pages stored in arena */
- int nextPage; /* index (from 0) to start next search at */
-};
-
-
-
-/* GUC variables */
-extern PGDLLIMPORT int MaxFSMRelations;
-extern PGDLLIMPORT int MaxFSMPages;
-
-
-/*
- * function prototypes
- */
-extern void InitFreeSpaceMap(void);
-extern Size FreeSpaceShmemSize(void);
-extern FSMHeader *GetFreeSpaceMap(void);
-
-extern BlockNumber GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded);
-extern BlockNumber RecordAndGetPageWithFreeSpace(RelFileNode *rel,
+/* prototypes for public functions in freespace.c */
+extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
+extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded);
+extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
BlockNumber oldPage,
Size oldSpaceAvail,
Size spaceNeeded);
-extern Size GetAvgFSMRequestSize(RelFileNode *rel);
-extern void RecordRelationFreeSpace(RelFileNode *rel,
- BlockNumber interestingPages,
- int nPages,
- FSMPageData *pageSpaces);
-
-extern BlockNumber GetFreeIndexPage(RelFileNode *rel);
-extern void RecordIndexFreeSpace(RelFileNode *rel,
- BlockNumber interestingPages,
- int nPages,
- BlockNumber *pages);
-
-extern void FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks);
-extern void FreeSpaceMapForgetRel(RelFileNode *rel);
-extern void FreeSpaceMapForgetDatabase(Oid dbid);
-
-extern void PrintFreeSpaceMapStatistics(int elevel);
+extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
+ Size spaceAvail);
-extern void DumpFreeSpaceMap(int code, Datum arg);
-extern void LoadFreeSpaceMap(void);
+extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks);
+extern void FreeSpaceMapVacuum(Relation rel);
-#ifdef FREESPACE_DEBUG
-extern void DumpFreeSpace(void);
-#endif
+/* WAL prototypes */
+extern void fsm_desc(StringInfo buf, uint8 xl_info, char *rec);
+extern void fsm_redo(XLogRecPtr lsn, XLogRecord *record);
#endif /* FREESPACE_H */
diff --git a/src/include/storage/fsm_internals.h b/src/include/storage/fsm_internals.h
new file mode 100644
index 0000000000..e7fbbf2b9b
--- /dev/null
+++ b/src/include/storage/fsm_internals.h
@@ -0,0 +1,73 @@
+/*-------------------------------------------------------------------------
+ *
+ * fsm_internal.h
+ * internal functions for free space map
+ *
+ *
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/storage/fsm_internals.h,v 1.1 2008/09/30 10:52:14 heikki Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FSM_INTERNALS_H
+#define FSM_INTERNALS_H
+
+#include "storage/buf.h"
+#include "storage/bufpage.h"
+#include "lib/stringinfo.h"
+
+/*
+ * Structure of a FSM page. See src/backend/storage/freespace/README for
+ * details.
+ */
+typedef struct
+{
+ /*
+ * fsm_search_avail() tries to spread the load of multiple backends
+ * by returning different pages to different backends in a round-robin
+ * fashion. fp_next_slot points to the next slot to be returned
+ * (assuming there's enough space on it for the request). It's defined
+ * as an int, because it's updated without an exclusive lock. uint16
+ * would be more appropriate, but int is more likely to be atomically
+ * fetchable/storable.
+ */
+ int fp_next_slot;
+
+ /*
+ * fp_nodes contains the binary tree, stored in array. The first
+ * NonLeafNodesPerPage elements are upper nodes, and the following
+ * LeafNodesPerPage elements are leaf nodes. Unused nodes are zero.
+ */
+ uint8 fp_nodes[1];
+} FSMPageData;
+
+typedef FSMPageData *FSMPage;
+
+/*
+ * Number of non-leaf and leaf nodes, and nodes in total, on an FSM page.
+ * These definitions are internal to fsmpage.c.
+ */
+#define NodesPerPage (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
+ offsetof(FSMPageData, fp_nodes))
+
+#define NonLeafNodesPerPage (BLCKSZ / 2 - 1)
+#define LeafNodesPerPage (NodesPerPage - NonLeafNodesPerPage)
+
+/*
+ * Number of FSM "slots" on a FSM page. This is what should be used
+ * outside fsmpage.c.
+ */
+#define SlotsPerFSMPage LeafNodesPerPage
+
+/* Prototypes for functions in fsmpage.c */
+extern int fsm_search_avail(Buffer buf, uint8 min_cat, bool advancenext,
+ bool exclusive_lock_held);
+extern uint8 fsm_get_avail(Page page, int slot);
+extern uint8 fsm_get_max_avail(Page page);
+extern bool fsm_set_avail(Page page, int slot, uint8 value);
+extern bool fsm_truncate_avail(Page page, int nslots);
+extern bool fsm_rebuild_page(Page page);
+
+#endif /* FSM_INTERNALS_H */
diff --git a/src/include/storage/indexfsm.h b/src/include/storage/indexfsm.h
new file mode 100644
index 0000000000..76bb26f7bc
--- /dev/null
+++ b/src/include/storage/indexfsm.h
@@ -0,0 +1,27 @@
+/*-------------------------------------------------------------------------
+ *
+ * indexfsm.h
+ * POSTGRES free space map for quickly finding an unused page in index
+ *
+ *
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/storage/indexfsm.h,v 1.1 2008/09/30 10:52:14 heikki Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef INDEXFSM_H_
+#define INDEXFSM_H_
+
+#include "utils/rel.h"
+
+extern void InitIndexFreeSpaceMap(Relation rel);
+
+extern BlockNumber GetFreeIndexPage(Relation rel);
+extern void RecordFreeIndexPage(Relation rel, BlockNumber page);
+extern void RecordUsedIndexPage(Relation rel, BlockNumber page);
+
+extern void IndexFreeSpaceMapTruncate(Relation rel, BlockNumber nblocks);
+
+#endif /* INDEXFSM_H */
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index b1088fcd33..5f993fa2ba 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.39 2008/06/19 21:32:56 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.40 2008/09/30 10:52:14 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -35,6 +35,10 @@
* by allowing values not listed in the enum declaration to be assigned.
* The extra value MaxDynamicLWLock is there to keep the compiler from
* deciding that the enum can be represented as char or short ...
+ *
+ * If you remove a lock, please replace it with a placeholder like was done
+ * for FreeSpaceMapLock. This retains the lock numbering, which is helpful for
+ * DTrace and other external debugging scripts.
*/
typedef enum LWLockId
{
@@ -45,7 +49,7 @@ typedef enum LWLockId
ProcArrayLock,
SInvalReadLock,
SInvalWriteLock,
- FreeSpaceLock,
+ UnusedLock1, /* FreeSpaceMapLock used to be here */
WALInsertLock,
WALWriteLock,
ControlFileLock,
diff --git a/src/include/storage/relfilenode.h b/src/include/storage/relfilenode.h
index 8ac8147ed9..571f261c3d 100644
--- a/src/include/storage/relfilenode.h
+++ b/src/include/storage/relfilenode.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.16 2008/08/11 11:05:11 heikki Exp $
+ * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.17 2008/09/30 10:52:14 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -23,11 +23,12 @@
typedef enum ForkNumber
{
InvalidForkNumber = -1,
- MAIN_FORKNUM = 0
- /* NOTE: change NUM_FORKS below when you add new forks */
+ MAIN_FORKNUM = 0,
+ FSM_FORKNUM
+ /* NOTE: change MAX_FORKNUM below when you add new forks */
} ForkNumber;
-#define MAX_FORKNUM MAIN_FORKNUM
+#define MAX_FORKNUM FSM_FORKNUM
/*
* RelFileNode must provide all that we need to know to physically access
diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h
index 436b003286..0eca0f54a3 100644
--- a/src/include/utils/guc_tables.h
+++ b/src/include/utils/guc_tables.h
@@ -7,7 +7,7 @@
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/utils/guc_tables.h,v 1.42 2008/09/10 18:09:20 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/utils/guc_tables.h,v 1.43 2008/09/30 10:52:14 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -49,7 +49,6 @@ enum config_group
CONN_AUTH_SECURITY,
RESOURCES,
RESOURCES_MEM,
- RESOURCES_FSM,
RESOURCES_KERNEL,
WAL,
WAL_SETTINGS,
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 0d9d75dd8b..71ad936d27 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.107 2008/06/19 00:46:06 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.108 2008/09/30 10:52:14 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -195,6 +195,9 @@ typedef struct RelationData
List *rd_indpred; /* index predicate tree, if any */
void *rd_amcache; /* available for use by index AM */
+ /* Cached last-seen size of the FSM */
+ BlockNumber rd_fsm_nblocks_cache;
+
/* use "struct" here to avoid needing to include pgstat.h: */
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
} RelationData;