diff options
| author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2008-09-30 10:52:14 +0000 |
|---|---|---|
| committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2008-09-30 10:52:14 +0000 |
| commit | 15c121b3ed7eb2f290e19533e41ccca734d23574 (patch) | |
| tree | b60226d720f87b82b5b44647e3d3031081cdfb07 /src/include | |
| parent | 2dbc0ca937f8ba9c76866a99fd04866232acea95 (diff) | |
| download | postgresql-15c121b3ed7eb2f290e19533e41ccca734d23574.tar.gz | |
Rewrite the FSM. Instead of relying on a fixed-size shared memory segment, the
free space information is stored in a dedicated FSM relation fork, with each
relation (except for hash indexes; they don't use FSM).
This eliminates the max_fsm_relations and max_fsm_pages GUC options; remove any
trace of them from the backend, initdb, and documentation.
Rewrite contrib/pg_freespacemap to match the new FSM implementation. Also
introduce a new variant of the get_raw_page(regclass, int4, int4) function in
contrib/pageinspect that let's you to return pages from any relation fork, and
a new fsm_page_contents() function to inspect the new FSM pages.
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/access/rmgr.h | 3 | ||||
| -rw-r--r-- | src/include/storage/freespace.h | 150 | ||||
| -rw-r--r-- | src/include/storage/fsm_internals.h | 73 | ||||
| -rw-r--r-- | src/include/storage/indexfsm.h | 27 | ||||
| -rw-r--r-- | src/include/storage/lwlock.h | 8 | ||||
| -rw-r--r-- | src/include/storage/relfilenode.h | 9 | ||||
| -rw-r--r-- | src/include/utils/guc_tables.h | 3 | ||||
| -rw-r--r-- | src/include/utils/rel.h | 5 |
8 files changed, 133 insertions, 145 deletions
diff --git a/src/include/access/rmgr.h b/src/include/access/rmgr.h index 7be2dfc9f6..6f018f0bee 100644 --- a/src/include/access/rmgr.h +++ b/src/include/access/rmgr.h @@ -3,7 +3,7 @@ * * Resource managers definition * - * $PostgreSQL: pgsql/src/include/access/rmgr.h,v 1.17 2006/11/05 22:42:10 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/rmgr.h,v 1.18 2008/09/30 10:52:13 heikki Exp $ */ #ifndef RMGR_H #define RMGR_H @@ -23,6 +23,7 @@ typedef uint8 RmgrId; #define RM_DBASE_ID 4 #define RM_TBLSPC_ID 5 #define RM_MULTIXACT_ID 6 +#define RM_FREESPACE_ID 7 #define RM_HEAP2_ID 9 #define RM_HEAP_ID 10 #define RM_BTREE_ID 11 diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h index 86dd22647c..d417e8c980 100644 --- a/src/include/storage/freespace.h +++ b/src/include/storage/freespace.h @@ -7,152 +7,32 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.28 2008/03/10 02:04:10 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.29 2008/09/30 10:52:13 heikki Exp $ * *------------------------------------------------------------------------- */ #ifndef FREESPACE_H_ #define FREESPACE_H_ -#include "storage/relfilenode.h" -#include "storage/itemptr.h" +#include "utils/rel.h" +#include "storage/bufpage.h" +#include "access/xlog.h" - -/* Initial value for average-request moving average */ -#define INITIAL_AVERAGE ((Size) (BLCKSZ / 32)) - -/* - * Number of pages and bytes per allocation chunk. Indexes can squeeze 50% - * more pages into the same space because they don't need to remember how much - * free space on each page. The nominal number of pages, CHUNKPAGES, is for - * regular rels, and INDEXCHUNKPAGES is for indexes. CHUNKPAGES should be - * even so that no space is wasted in the index case. - */ -#define CHUNKPAGES 16 -#define CHUNKBYTES (CHUNKPAGES * sizeof(FSMPageData)) -#define INDEXCHUNKPAGES ((int) (CHUNKBYTES / sizeof(IndexFSMPageData))) - - -/* - * Typedefs and macros for items in the page-storage arena. We use the - * existing ItemPointer and BlockId data structures, which are designed - * to pack well (they should be 6 and 4 bytes apiece regardless of machine - * alignment issues). Unfortunately we can't use the ItemPointer access - * macros, because they include Asserts insisting that ip_posid != 0. - */ -typedef ItemPointerData FSMPageData; -typedef BlockIdData IndexFSMPageData; - -#define FSMPageGetPageNum(ptr) \ - BlockIdGetBlockNumber(&(ptr)->ip_blkid) -#define FSMPageGetSpace(ptr) \ - ((Size) (ptr)->ip_posid) -#define FSMPageSetPageNum(ptr, pg) \ - BlockIdSet(&(ptr)->ip_blkid, pg) -#define FSMPageSetSpace(ptr, sz) \ - ((ptr)->ip_posid = (OffsetNumber) (sz)) -#define IndexFSMPageGetPageNum(ptr) \ - BlockIdGetBlockNumber(ptr) -#define IndexFSMPageSetPageNum(ptr, pg) \ - BlockIdSet(ptr, pg) - -/* - * Shared free-space-map objects - * - * The per-relation objects are indexed by a hash table, and are also members - * of two linked lists: one ordered by recency of usage (most recent first), - * and the other ordered by physical location of the associated storage in - * the page-info arena. - * - * Each relation owns one or more chunks of per-page storage in the "arena". - * The chunks for each relation are always consecutive, so that it can treat - * its page storage as a simple array. We further insist that its page data - * be ordered by block number, so that binary search is possible. - * - * Note: we handle pointers to these items as pointers, not as SHMEM_OFFSETs. - * This assumes that all processes accessing the map will have the shared - * memory segment mapped at the same place in their address space. - */ -typedef struct FSMHeader FSMHeader; -typedef struct FSMRelation FSMRelation; - -/* Header for whole map */ -struct FSMHeader -{ - FSMRelation *usageList; /* FSMRelations in usage-recency order */ - FSMRelation *usageListTail; /* tail of usage-recency list */ - FSMRelation *firstRel; /* FSMRelations in arena storage order */ - FSMRelation *lastRel; /* tail of storage-order list */ - int numRels; /* number of FSMRelations now in use */ - double sumRequests; /* sum of requested chunks over all rels */ - char *arena; /* arena for page-info storage */ - int totalChunks; /* total size of arena, in chunks */ - int usedChunks; /* # of chunks assigned */ - /* NB: there are totalChunks - usedChunks free chunks at end of arena */ -}; - -/* - * Per-relation struct --- this is an entry in the shared hash table. - * The hash key is the RelFileNode value (hence, we look at the physical - * relation ID, not the logical ID, which is appropriate). - */ -struct FSMRelation -{ - RelFileNode key; /* hash key (must be first) */ - FSMRelation *nextUsage; /* next rel in usage-recency order */ - FSMRelation *priorUsage; /* prior rel in usage-recency order */ - FSMRelation *nextPhysical; /* next rel in arena-storage order */ - FSMRelation *priorPhysical; /* prior rel in arena-storage order */ - bool isIndex; /* if true, we store only page numbers */ - Size avgRequest; /* moving average of space requests */ - BlockNumber interestingPages; /* # of pages with useful free space */ - int firstChunk; /* chunk # of my first chunk in arena */ - int storedPages; /* # of pages stored in arena */ - int nextPage; /* index (from 0) to start next search at */ -}; - - - -/* GUC variables */ -extern PGDLLIMPORT int MaxFSMRelations; -extern PGDLLIMPORT int MaxFSMPages; - - -/* - * function prototypes - */ -extern void InitFreeSpaceMap(void); -extern Size FreeSpaceShmemSize(void); -extern FSMHeader *GetFreeSpaceMap(void); - -extern BlockNumber GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded); -extern BlockNumber RecordAndGetPageWithFreeSpace(RelFileNode *rel, +/* prototypes for public functions in freespace.c */ +extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk); +extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded); +extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded); -extern Size GetAvgFSMRequestSize(RelFileNode *rel); -extern void RecordRelationFreeSpace(RelFileNode *rel, - BlockNumber interestingPages, - int nPages, - FSMPageData *pageSpaces); - -extern BlockNumber GetFreeIndexPage(RelFileNode *rel); -extern void RecordIndexFreeSpace(RelFileNode *rel, - BlockNumber interestingPages, - int nPages, - BlockNumber *pages); - -extern void FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks); -extern void FreeSpaceMapForgetRel(RelFileNode *rel); -extern void FreeSpaceMapForgetDatabase(Oid dbid); - -extern void PrintFreeSpaceMapStatistics(int elevel); +extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, + Size spaceAvail); -extern void DumpFreeSpaceMap(int code, Datum arg); -extern void LoadFreeSpaceMap(void); +extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks); +extern void FreeSpaceMapVacuum(Relation rel); -#ifdef FREESPACE_DEBUG -extern void DumpFreeSpace(void); -#endif +/* WAL prototypes */ +extern void fsm_desc(StringInfo buf, uint8 xl_info, char *rec); +extern void fsm_redo(XLogRecPtr lsn, XLogRecord *record); #endif /* FREESPACE_H */ diff --git a/src/include/storage/fsm_internals.h b/src/include/storage/fsm_internals.h new file mode 100644 index 0000000000..e7fbbf2b9b --- /dev/null +++ b/src/include/storage/fsm_internals.h @@ -0,0 +1,73 @@ +/*------------------------------------------------------------------------- + * + * fsm_internal.h + * internal functions for free space map + * + * + * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/storage/fsm_internals.h,v 1.1 2008/09/30 10:52:14 heikki Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef FSM_INTERNALS_H +#define FSM_INTERNALS_H + +#include "storage/buf.h" +#include "storage/bufpage.h" +#include "lib/stringinfo.h" + +/* + * Structure of a FSM page. See src/backend/storage/freespace/README for + * details. + */ +typedef struct +{ + /* + * fsm_search_avail() tries to spread the load of multiple backends + * by returning different pages to different backends in a round-robin + * fashion. fp_next_slot points to the next slot to be returned + * (assuming there's enough space on it for the request). It's defined + * as an int, because it's updated without an exclusive lock. uint16 + * would be more appropriate, but int is more likely to be atomically + * fetchable/storable. + */ + int fp_next_slot; + + /* + * fp_nodes contains the binary tree, stored in array. The first + * NonLeafNodesPerPage elements are upper nodes, and the following + * LeafNodesPerPage elements are leaf nodes. Unused nodes are zero. + */ + uint8 fp_nodes[1]; +} FSMPageData; + +typedef FSMPageData *FSMPage; + +/* + * Number of non-leaf and leaf nodes, and nodes in total, on an FSM page. + * These definitions are internal to fsmpage.c. + */ +#define NodesPerPage (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \ + offsetof(FSMPageData, fp_nodes)) + +#define NonLeafNodesPerPage (BLCKSZ / 2 - 1) +#define LeafNodesPerPage (NodesPerPage - NonLeafNodesPerPage) + +/* + * Number of FSM "slots" on a FSM page. This is what should be used + * outside fsmpage.c. + */ +#define SlotsPerFSMPage LeafNodesPerPage + +/* Prototypes for functions in fsmpage.c */ +extern int fsm_search_avail(Buffer buf, uint8 min_cat, bool advancenext, + bool exclusive_lock_held); +extern uint8 fsm_get_avail(Page page, int slot); +extern uint8 fsm_get_max_avail(Page page); +extern bool fsm_set_avail(Page page, int slot, uint8 value); +extern bool fsm_truncate_avail(Page page, int nslots); +extern bool fsm_rebuild_page(Page page); + +#endif /* FSM_INTERNALS_H */ diff --git a/src/include/storage/indexfsm.h b/src/include/storage/indexfsm.h new file mode 100644 index 0000000000..76bb26f7bc --- /dev/null +++ b/src/include/storage/indexfsm.h @@ -0,0 +1,27 @@ +/*------------------------------------------------------------------------- + * + * indexfsm.h + * POSTGRES free space map for quickly finding an unused page in index + * + * + * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/storage/indexfsm.h,v 1.1 2008/09/30 10:52:14 heikki Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef INDEXFSM_H_ +#define INDEXFSM_H_ + +#include "utils/rel.h" + +extern void InitIndexFreeSpaceMap(Relation rel); + +extern BlockNumber GetFreeIndexPage(Relation rel); +extern void RecordFreeIndexPage(Relation rel, BlockNumber page); +extern void RecordUsedIndexPage(Relation rel, BlockNumber page); + +extern void IndexFreeSpaceMapTruncate(Relation rel, BlockNumber nblocks); + +#endif /* INDEXFSM_H */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index b1088fcd33..5f993fa2ba 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.39 2008/06/19 21:32:56 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.40 2008/09/30 10:52:14 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -35,6 +35,10 @@ * by allowing values not listed in the enum declaration to be assigned. * The extra value MaxDynamicLWLock is there to keep the compiler from * deciding that the enum can be represented as char or short ... + * + * If you remove a lock, please replace it with a placeholder like was done + * for FreeSpaceMapLock. This retains the lock numbering, which is helpful for + * DTrace and other external debugging scripts. */ typedef enum LWLockId { @@ -45,7 +49,7 @@ typedef enum LWLockId ProcArrayLock, SInvalReadLock, SInvalWriteLock, - FreeSpaceLock, + UnusedLock1, /* FreeSpaceMapLock used to be here */ WALInsertLock, WALWriteLock, ControlFileLock, diff --git a/src/include/storage/relfilenode.h b/src/include/storage/relfilenode.h index 8ac8147ed9..571f261c3d 100644 --- a/src/include/storage/relfilenode.h +++ b/src/include/storage/relfilenode.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.16 2008/08/11 11:05:11 heikki Exp $ + * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.17 2008/09/30 10:52:14 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -23,11 +23,12 @@ typedef enum ForkNumber { InvalidForkNumber = -1, - MAIN_FORKNUM = 0 - /* NOTE: change NUM_FORKS below when you add new forks */ + MAIN_FORKNUM = 0, + FSM_FORKNUM + /* NOTE: change MAX_FORKNUM below when you add new forks */ } ForkNumber; -#define MAX_FORKNUM MAIN_FORKNUM +#define MAX_FORKNUM FSM_FORKNUM /* * RelFileNode must provide all that we need to know to physically access diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h index 436b003286..0eca0f54a3 100644 --- a/src/include/utils/guc_tables.h +++ b/src/include/utils/guc_tables.h @@ -7,7 +7,7 @@ * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/utils/guc_tables.h,v 1.42 2008/09/10 18:09:20 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/utils/guc_tables.h,v 1.43 2008/09/30 10:52:14 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -49,7 +49,6 @@ enum config_group CONN_AUTH_SECURITY, RESOURCES, RESOURCES_MEM, - RESOURCES_FSM, RESOURCES_KERNEL, WAL, WAL_SETTINGS, diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 0d9d75dd8b..71ad936d27 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.107 2008/06/19 00:46:06 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.108 2008/09/30 10:52:14 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -195,6 +195,9 @@ typedef struct RelationData List *rd_indpred; /* index predicate tree, if any */ void *rd_amcache; /* available for use by index AM */ + /* Cached last-seen size of the FSM */ + BlockNumber rd_fsm_nblocks_cache; + /* use "struct" here to avoid needing to include pgstat.h: */ struct PgStat_TableStatus *pgstat_info; /* statistics collection area */ } RelationData; |
