diff options
| author | Robert Haas <rhaas@postgresql.org> | 2010-08-13 20:10:54 +0000 |
|---|---|---|
| committer | Robert Haas <rhaas@postgresql.org> | 2010-08-13 20:10:54 +0000 |
| commit | debcec7dc31a992703911a9953e299c8d730c778 (patch) | |
| tree | dad0d10ec39eafe0cc254c17e36eb82ec822cdac /src/backend/storage/smgr/md.c | |
| parent | 3f9479ef3fdf49fc22088be5268fa536cf5d4efd (diff) | |
| download | postgresql-debcec7dc31a992703911a9953e299c8d730c778.tar.gz | |
Include the backend ID in the relpath of temporary relations.
This allows us to reliably remove all leftover temporary relation
files on cluster startup without reference to system catalogs or WAL;
therefore, we no longer include temporary relations in XLOG_XACT_COMMIT
and XLOG_XACT_ABORT WAL records.
Since these changes require including a backend ID in each
SharedInvalSmgrMsg, the size of the SharedInvalidationMessage.id
field has been reduced from two bytes to one, and the maximum number
of connections has been reduced from INT_MAX / 4 to 2^23-1. It would
be possible to remove these restrictions by increasing the size of
SharedInvalidationMessage by 4 bytes, but right now that doesn't seem
like a good trade-off.
Review by Jaime Casanova and Tom Lane.
Diffstat (limited to 'src/backend/storage/smgr/md.c')
| -rw-r--r-- | src/backend/storage/smgr/md.c | 92 |
1 files changed, 49 insertions, 43 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index eb5c73d6f8..f1ff2fe15e 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.151 2010/02/26 02:01:01 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.152 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -119,7 +119,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */ */ typedef struct { - RelFileNode rnode; /* the targeted relation */ + RelFileNodeBackend rnode; /* the targeted relation */ ForkNumber forknum; BlockNumber segno; /* which segment */ } PendingOperationTag; @@ -135,7 +135,7 @@ typedef struct typedef struct { - RelFileNode rnode; /* the dead relation to delete */ + RelFileNodeBackend rnode; /* the dead relation to delete */ CycleCtr cycle_ctr; /* mdckpt_cycle_ctr when request was made */ } PendingUnlinkEntry; @@ -158,14 +158,14 @@ static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior); static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg); -static void register_unlink(RelFileNode rnode); +static void register_unlink(RelFileNodeBackend rnode); static MdfdVec *_fdvec_alloc(void); static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno); static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags); static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno, - BlockNumber blkno, bool isTemp, ExtensionBehavior behavior); + BlockNumber blkno, bool skipFsync, ExtensionBehavior behavior); static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg); @@ -321,7 +321,7 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo) * we are usually not in a transaction anymore when this is called. */ void -mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo) +mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) { char *path; int ret; @@ -417,7 +417,7 @@ mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo) */ void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { off_t seekpos; int nbytes; @@ -440,7 +440,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, relpath(reln->smgr_rnode, forknum), InvalidBlockNumber))); - v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE); + v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -478,7 +478,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, errhint("Check free disk space."))); } - if (!isTemp) + if (!skipFsync && !SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); @@ -605,9 +605,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, MdfdVec *v; TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode); + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend); v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL); @@ -624,9 +625,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ); TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, nbytes, BLCKSZ); @@ -666,7 +668,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, */ void mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { off_t seekpos; int nbytes; @@ -678,11 +680,12 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, #endif TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode); + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend); - v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL); + v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_FAIL); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -697,9 +700,10 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ); TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, nbytes, BLCKSZ); @@ -720,7 +724,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, errhint("Check free disk space."))); } - if (!isTemp) + if (!skipFsync && !SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); } @@ -794,8 +798,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * mdtruncate() -- Truncate relation to specified number of blocks. */ void -mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, - bool isTemp) +mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) { MdfdVec *v; BlockNumber curnblk; @@ -839,7 +842,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, errmsg("could not truncate file \"%s\": %m", FilePathName(v->mdfd_vfd)))); - if (!isTemp) + if (!SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st @@ -864,7 +867,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, errmsg("could not truncate file \"%s\" to %u blocks: %m", FilePathName(v->mdfd_vfd), nblocks))); - if (!isTemp) + if (!SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; ov->mdfd_chain = NULL; @@ -1052,7 +1055,8 @@ mdsync(void) * the relation will have been dirtied through this same smgr * relation, and so we can save a file open/close cycle. */ - reln = smgropen(entry->tag.rnode); + reln = smgropen(entry->tag.rnode.node, + entry->tag.rnode.backend); /* * It is possible that the relation has been dropped or @@ -1235,7 +1239,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) * a remote pending-ops table. */ static void -register_unlink(RelFileNode rnode) +register_unlink(RelFileNodeBackend rnode) { if (pendingOpsTable) { @@ -1278,7 +1282,8 @@ register_unlink(RelFileNode rnode) * structure for them.) */ void -RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) +RememberFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, + BlockNumber segno) { Assert(pendingOpsTable); @@ -1291,7 +1296,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) hash_seq_init(&hstat, pendingOpsTable); while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) { - if (RelFileNodeEquals(entry->tag.rnode, rnode) && + if (RelFileNodeBackendEquals(entry->tag.rnode, rnode) && entry->tag.forknum == forknum) { /* Okay, cancel this entry */ @@ -1312,7 +1317,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) hash_seq_init(&hstat, pendingOpsTable); while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) { - if (entry->tag.rnode.dbNode == rnode.dbNode) + if (entry->tag.rnode.node.dbNode == rnode.node.dbNode) { /* Okay, cancel this entry */ entry->canceled = true; @@ -1326,7 +1331,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(cell); next = lnext(cell); - if (entry->rnode.dbNode == rnode.dbNode) + if (entry->rnode.node.dbNode == rnode.node.dbNode) { pendingUnlinks = list_delete_cell(pendingUnlinks, cell, prev); pfree(entry); @@ -1393,7 +1398,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) * ForgetRelationFsyncRequests -- forget any fsyncs for a rel */ void -ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum) +ForgetRelationFsyncRequests(RelFileNodeBackend rnode, ForkNumber forknum) { if (pendingOpsTable) { @@ -1428,11 +1433,12 @@ ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum) void ForgetDatabaseFsyncRequests(Oid dbid) { - RelFileNode rnode; + RelFileNodeBackend rnode; - rnode.dbNode = dbid; - rnode.spcNode = 0; - rnode.relNode = 0; + rnode.node.dbNode = dbid; + rnode.node.spcNode = 0; + rnode.node.relNode = 0; + rnode.backend = InvalidBackendId; if (pendingOpsTable) { @@ -1523,12 +1529,12 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, * specified block. * * If the segment doesn't exist, we ereport, return NULL, or create the - * segment, according to "behavior". Note: isTemp need only be correct - * in the EXTENSION_CREATE case. + * segment, according to "behavior". Note: skipFsync is only used in the + * EXTENSION_CREATE case. */ static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, - bool isTemp, ExtensionBehavior behavior) + bool skipFsync, ExtensionBehavior behavior) { MdfdVec *v = mdopen(reln, forknum, behavior); BlockNumber targetseg; @@ -1566,7 +1572,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, mdextend(reln, forknum, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, - zerobuf, isTemp); + zerobuf, skipFsync); pfree(zerobuf); } v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT); |
