summaryrefslogtreecommitdiff
path: root/src/backend/storage/smgr/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/smgr/md.c')
-rw-r--r--src/backend/storage/smgr/md.c299
1 files changed, 178 insertions, 121 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index acd669f1f7..a76fea454d 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.138 2008/05/02 01:08:27 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.139 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -23,6 +23,7 @@
#include "postmaster/bgwriter.h"
#include "storage/fd.h"
#include "storage/bufmgr.h"
+#include "storage/relfilenode.h"
#include "storage/smgr.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
@@ -118,6 +119,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */
typedef struct
{
RelFileNode rnode; /* the targeted relation */
+ ForkNumber forknum;
BlockNumber segno; /* which segment */
} PendingOperationTag;
@@ -151,15 +153,18 @@ typedef enum /* behavior for mdopen & _mdfd_getseg */
} ExtensionBehavior;
/* local routines */
-static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior);
-static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
+static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum,
+ ExtensionBehavior behavior);
+static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum,
+ MdfdVec *seg);
static void register_unlink(RelFileNode rnode);
static MdfdVec *_fdvec_alloc(void);
-static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
- int oflags);
-static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno,
- bool isTemp, ExtensionBehavior behavior);
-static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg);
+static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno,
+ BlockNumber segno, int oflags);
+static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno,
+ BlockNumber blkno, bool isTemp, ExtensionBehavior behavior);
+static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
+ MdfdVec *seg);
/*
@@ -198,22 +203,39 @@ mdinit(void)
}
/*
+ * mdexists() -- Does the physical file exist?
+ *
+ * Note: this will return true for lingering files, with pending deletions
+ */
+bool
+mdexists(SMgrRelation reln, ForkNumber forkNum)
+{
+ /*
+ * Close it first, to ensure that we notice if the fork has been
+ * unlinked since we opened it.
+ */
+ mdclose(reln, forkNum);
+
+ return (mdopen(reln, forkNum, EXTENSION_RETURN_NULL) != NULL);
+}
+
+/*
* mdcreate() -- Create a new relation on magnetic disk.
*
* If isRedo is true, it's okay for the relation to exist already.
*/
void
-mdcreate(SMgrRelation reln, bool isRedo)
+mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
{
char *path;
File fd;
- if (isRedo && reln->md_fd != NULL)
+ if (isRedo && reln->md_fd[forkNum] != NULL)
return; /* created and opened already... */
- Assert(reln->md_fd == NULL);
+ Assert(reln->md_fd[forkNum] == NULL);
- path = relpath(reln->smgr_rnode);
+ path = relpath(reln->smgr_rnode, forkNum);
fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
@@ -236,20 +258,21 @@ mdcreate(SMgrRelation reln, bool isRedo)
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not create relation %u/%u/%u: %m",
+ errmsg("could not create relation %u/%u/%u/%u: %m",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ reln->smgr_rnode.relNode,
+ forkNum)));
}
}
pfree(path);
- reln->md_fd = _fdvec_alloc();
+ reln->md_fd[forkNum] = _fdvec_alloc();
- reln->md_fd->mdfd_vfd = fd;
- reln->md_fd->mdfd_segno = 0;
- reln->md_fd->mdfd_chain = NULL;
+ reln->md_fd[forkNum]->mdfd_vfd = fd;
+ reln->md_fd[forkNum]->mdfd_segno = 0;
+ reln->md_fd[forkNum]->mdfd_chain = NULL;
}
/*
@@ -285,7 +308,7 @@ mdcreate(SMgrRelation reln, bool isRedo)
* we are usually not in a transaction anymore when this is called.
*/
void
-mdunlink(RelFileNode rnode, bool isRedo)
+mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo)
{
char *path;
int ret;
@@ -294,14 +317,14 @@ mdunlink(RelFileNode rnode, bool isRedo)
* We have to clean out any pending fsync requests for the doomed
* relation, else the next mdsync() will fail.
*/
- ForgetRelationFsyncRequests(rnode);
+ ForgetRelationFsyncRequests(rnode, forkNum);
- path = relpath(rnode);
+ path = relpath(rnode, forkNum);
/*
* Delete or truncate the first segment.
*/
- if (isRedo)
+ if (isRedo || forkNum != MAIN_FORKNUM)
ret = unlink(path);
else
{
@@ -326,10 +349,11 @@ mdunlink(RelFileNode rnode, bool isRedo)
if (!isRedo || errno != ENOENT)
ereport(WARNING,
(errcode_for_file_access(),
- errmsg("could not remove relation %u/%u/%u: %m",
+ errmsg("could not remove relation %u/%u/%u/%u: %m",
rnode.spcNode,
rnode.dbNode,
- rnode.relNode)));
+ rnode.relNode,
+ forkNum)));
}
/*
@@ -353,11 +377,12 @@ mdunlink(RelFileNode rnode, bool isRedo)
if (errno != ENOENT)
ereport(WARNING,
(errcode_for_file_access(),
- errmsg("could not remove segment %u of relation %u/%u/%u: %m",
+ errmsg("could not remove segment %u of relation %u/%u/%u/%u: %m",
segno,
rnode.spcNode,
rnode.dbNode,
- rnode.relNode)));
+ rnode.relNode,
+ forkNum)));
break;
}
}
@@ -367,7 +392,7 @@ mdunlink(RelFileNode rnode, bool isRedo)
pfree(path);
/* Register request to unlink first segment later */
- if (!isRedo)
+ if (!isRedo && forkNum == MAIN_FORKNUM)
register_unlink(rnode);
}
@@ -381,7 +406,8 @@ mdunlink(RelFileNode rnode, bool isRedo)
* causes intervening file space to become filled with zeroes.
*/
void
-mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
+mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+ char *buffer, bool isTemp)
{
off_t seekpos;
int nbytes;
@@ -389,7 +415,7 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
/* This assert is too expensive to have on normally ... */
#ifdef CHECK_WRITE_VS_EXTEND
- Assert(blocknum >= mdnblocks(reln));
+ Assert(blocknum >= mdnblocks(reln, forknum));
#endif
/*
@@ -400,13 +426,14 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
if (blocknum == InvalidBlockNumber)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("cannot extend relation %u/%u/%u beyond %u blocks",
+ errmsg("cannot extend relation %u/%u/%u/%u beyond %u blocks",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
+ forknum,
InvalidBlockNumber)));
- v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE);
+ v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE);
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
@@ -423,37 +450,40 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+ errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m",
blocknum,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ reln->smgr_rnode.relNode,
+ forknum)));
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
{
if (nbytes < 0)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not extend relation %u/%u/%u: %m",
+ errmsg("could not extend relation %u/%u/%u/%u: %m",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode),
+ reln->smgr_rnode.relNode,
+ forknum),
errhint("Check free disk space.")));
/* short write: complain appropriately */
ereport(ERROR,
(errcode(ERRCODE_DISK_FULL),
- errmsg("could not extend relation %u/%u/%u: wrote only %d of %d bytes at block %u",
+ errmsg("could not extend relation %u/%u/%u/%u: wrote only %d of %d bytes at block %u",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
+ forknum,
nbytes, BLCKSZ, blocknum),
errhint("Check free disk space.")));
}
if (!isTemp)
- register_dirty_segment(reln, v);
+ register_dirty_segment(reln, forknum, v);
- Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
+ Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
}
/*
@@ -467,17 +497,17 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
* invent one out of whole cloth.
*/
static MdfdVec *
-mdopen(SMgrRelation reln, ExtensionBehavior behavior)
+mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior)
{
MdfdVec *mdfd;
char *path;
File fd;
/* No work if already open */
- if (reln->md_fd)
- return reln->md_fd;
+ if (reln->md_fd[forknum])
+ return reln->md_fd[forknum];
- path = relpath(reln->smgr_rnode);
+ path = relpath(reln->smgr_rnode, forknum);
fd = PathNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
@@ -499,21 +529,22 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior)
return NULL;
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not open relation %u/%u/%u: %m",
+ errmsg("could not open relation %u/%u/%u/%u: %m",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ reln->smgr_rnode.relNode,
+ forknum)));
}
}
pfree(path);
- reln->md_fd = mdfd = _fdvec_alloc();
+ reln->md_fd[forknum] = mdfd = _fdvec_alloc();
mdfd->mdfd_vfd = fd;
mdfd->mdfd_segno = 0;
mdfd->mdfd_chain = NULL;
- Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE));
+ Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
return mdfd;
}
@@ -522,15 +553,15 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior)
* mdclose() -- Close the specified relation, if it isn't closed already.
*/
void
-mdclose(SMgrRelation reln)
+mdclose(SMgrRelation reln, ForkNumber forknum)
{
- MdfdVec *v = reln->md_fd;
+ MdfdVec *v = reln->md_fd[forknum];
/* No work if already closed */
if (v == NULL)
return;
- reln->md_fd = NULL; /* prevent dangling pointer after error */
+ reln->md_fd[forknum] = NULL; /* prevent dangling pointer after error */
while (v != NULL)
{
@@ -549,13 +580,14 @@ mdclose(SMgrRelation reln)
* mdread() -- Read the specified block from a relation.
*/
void
-mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
+mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+ char *buffer)
{
off_t seekpos;
int nbytes;
MdfdVec *v;
- v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL);
+ v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
@@ -563,22 +595,24 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+ errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m",
blocknum,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ reln->smgr_rnode.relNode,
+ forknum)));
if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
{
if (nbytes < 0)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not read block %u of relation %u/%u/%u: %m",
+ errmsg("could not read block %u of relation %u/%u/%u/%u: %m",
blocknum,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ reln->smgr_rnode.relNode,
+ forknum)));
/*
* Short read: we are at or past EOF, or we read a partial block at
@@ -593,11 +627,12 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
else
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
- errmsg("could not read block %u of relation %u/%u/%u: read only %d of %d bytes",
+ errmsg("could not read block %u of relation %u/%u/%u/%u: read only %d of %d bytes",
blocknum,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
+ forknum,
nbytes, BLCKSZ)));
}
}
@@ -610,7 +645,8 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
* use mdextend().
*/
void
-mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
+mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+ char *buffer, bool isTemp)
{
off_t seekpos;
int nbytes;
@@ -618,10 +654,10 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
/* This assert is too expensive to have on normally ... */
#ifdef CHECK_WRITE_VS_EXTEND
- Assert(blocknum < mdnblocks(reln));
+ Assert(blocknum < mdnblocks(reln, forknum));
#endif
- v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL);
+ v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL);
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
@@ -629,36 +665,39 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+ errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m",
blocknum,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ reln->smgr_rnode.relNode,
+ forknum)));
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
{
if (nbytes < 0)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not write block %u of relation %u/%u/%u: %m",
+ errmsg("could not write block %u of relation %u/%u/%u/%u: %m",
blocknum,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ reln->smgr_rnode.relNode,
+ forknum)));
/* short write: complain appropriately */
ereport(ERROR,
(errcode(ERRCODE_DISK_FULL),
- errmsg("could not write block %u of relation %u/%u/%u: wrote only %d of %d bytes",
+ errmsg("could not write block %u of relation %u/%u/%u/%u: wrote only %d of %d bytes",
blocknum,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
+ forknum,
nbytes, BLCKSZ),
errhint("Check free disk space.")));
}
if (!isTemp)
- register_dirty_segment(reln, v);
+ register_dirty_segment(reln, forknum, v);
}
/*
@@ -670,9 +709,9 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
* are present in the chain.
*/
BlockNumber
-mdnblocks(SMgrRelation reln)
+mdnblocks(SMgrRelation reln, ForkNumber forknum)
{
- MdfdVec *v = mdopen(reln, EXTENSION_FAIL);
+ MdfdVec *v = mdopen(reln, forknum, EXTENSION_FAIL);
BlockNumber nblocks;
BlockNumber segno = 0;
@@ -696,7 +735,7 @@ mdnblocks(SMgrRelation reln)
for (;;)
{
- nblocks = _mdnblocks(reln, v);
+ nblocks = _mdnblocks(reln, forknum, v);
if (nblocks > ((BlockNumber) RELSEG_SIZE))
elog(FATAL, "segment too big");
if (nblocks < ((BlockNumber) RELSEG_SIZE))
@@ -715,15 +754,16 @@ mdnblocks(SMgrRelation reln)
* RELSEG_SIZE. While perhaps not strictly necessary, this keeps
* the logic simple.
*/
- v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
+ v->mdfd_chain = _mdfd_openseg(reln, forknum, segno, O_CREAT);
if (v->mdfd_chain == NULL)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not open segment %u of relation %u/%u/%u: %m",
+ errmsg("could not open segment %u of relation %u/%u/%u/%u: %m",
segno,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ reln->smgr_rnode.relNode,
+ forknum)));
}
v = v->mdfd_chain;
@@ -734,7 +774,8 @@ mdnblocks(SMgrRelation reln)
* mdtruncate() -- Truncate relation to specified number of blocks.
*/
void
-mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
+mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
+ bool isTemp)
{
MdfdVec *v;
BlockNumber curnblk;
@@ -744,23 +785,24 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
* NOTE: mdnblocks makes sure we have opened all active segments, so that
* truncation loop will get them all!
*/
- curnblk = mdnblocks(reln);
+ curnblk = mdnblocks(reln, forknum);
if (nblocks > curnblk)
{
/* Bogus request ... but no complaint if InRecovery */
if (InRecovery)
return;
ereport(ERROR,
- (errmsg("could not truncate relation %u/%u/%u to %u blocks: it's only %u blocks now",
+ (errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: it's only %u blocks now",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
+ forknum,
nblocks, curnblk)));
}
if (nblocks == curnblk)
return; /* no work */
- v = mdopen(reln, EXTENSION_FAIL);
+ v = mdopen(reln, forknum, EXTENSION_FAIL);
priorblocks = 0;
while (v != NULL)
@@ -777,15 +819,16 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
if (FileTruncate(v->mdfd_vfd, 0) < 0)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
+ errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: %m",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
+ forknum,
nblocks)));
if (!isTemp)
- register_dirty_segment(reln, v);
+ register_dirty_segment(reln, forknum, v);
v = v->mdfd_chain;
- Assert(ov != reln->md_fd); /* we never drop the 1st segment */
+ Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st segment */
pfree(ov);
}
else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
@@ -803,13 +846,14 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ) < 0)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
+ errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: %m",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
+ forknum,
nblocks)));
if (!isTemp)
- register_dirty_segment(reln, v);
+ register_dirty_segment(reln, forknum, v);
v = v->mdfd_chain;
ov->mdfd_chain = NULL;
}
@@ -832,7 +876,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
* nothing of dirty buffers that may exist inside the buffer manager.
*/
void
-mdimmedsync(SMgrRelation reln)
+mdimmedsync(SMgrRelation reln, ForkNumber forknum)
{
MdfdVec *v;
BlockNumber curnblk;
@@ -841,20 +885,21 @@ mdimmedsync(SMgrRelation reln)
* NOTE: mdnblocks makes sure we have opened all active segments, so that
* fsync loop will get them all!
*/
- curnblk = mdnblocks(reln);
+ curnblk = mdnblocks(reln, forknum);
- v = mdopen(reln, EXTENSION_FAIL);
+ v = mdopen(reln, forknum, EXTENSION_FAIL);
while (v != NULL)
{
if (FileSync(v->mdfd_vfd) < 0)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+ errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m",
v->mdfd_segno,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ reln->smgr_rnode.relNode,
+ forknum)));
v = v->mdfd_chain;
}
}
@@ -1008,7 +1053,7 @@ mdsync(void)
* FileSync, since fd.c might have closed the file behind our
* back.
*/
- seg = _mdfd_getseg(reln,
+ seg = _mdfd_getseg(reln, entry->tag.forknum,
entry->tag.segno * ((BlockNumber) RELSEG_SIZE),
false, EXTENSION_RETURN_NULL);
if (seg != NULL &&
@@ -1024,19 +1069,21 @@ mdsync(void)
failures > 0)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+ errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m",
entry->tag.segno,
entry->tag.rnode.spcNode,
entry->tag.rnode.dbNode,
- entry->tag.rnode.relNode)));
+ entry->tag.rnode.relNode,
+ entry->tag.forknum)));
else
ereport(DEBUG1,
(errcode_for_file_access(),
- errmsg("could not fsync segment %u of relation %u/%u/%u, but retrying: %m",
+ errmsg("could not fsync segment %u of relation %u/%u/%u/%u but retrying: %m",
entry->tag.segno,
entry->tag.rnode.spcNode,
entry->tag.rnode.dbNode,
- entry->tag.rnode.relNode)));
+ entry->tag.rnode.relNode,
+ entry->tag.forknum)));
/*
* Absorb incoming requests and check to see if canceled.
@@ -1126,7 +1173,7 @@ mdpostckpt(void)
Assert((CycleCtr) (entry->cycle_ctr + 1) == mdckpt_cycle_ctr);
/* Unlink the file */
- path = relpath(entry->rnode);
+ path = relpath(entry->rnode, MAIN_FORKNUM);
if (unlink(path) < 0)
{
/*
@@ -1139,10 +1186,11 @@ mdpostckpt(void)
if (errno != ENOENT)
ereport(WARNING,
(errcode_for_file_access(),
- errmsg("could not remove relation %u/%u/%u: %m",
+ errmsg("could not remove relation %u/%u/%u/%u: %m",
entry->rnode.spcNode,
entry->rnode.dbNode,
- entry->rnode.relNode)));
+ entry->rnode.relNode,
+ MAIN_FORKNUM)));
}
pfree(path);
@@ -1161,26 +1209,27 @@ mdpostckpt(void)
* to be a performance problem).
*/
static void
-register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
+register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
{
if (pendingOpsTable)
{
/* push it into local pending-ops table */
- RememberFsyncRequest(reln->smgr_rnode, seg->mdfd_segno);
+ RememberFsyncRequest(reln->smgr_rnode, forknum, seg->mdfd_segno);
}
else
{
- if (ForwardFsyncRequest(reln->smgr_rnode, seg->mdfd_segno))
+ if (ForwardFsyncRequest(reln->smgr_rnode, forknum, seg->mdfd_segno))
return; /* passed it off successfully */
if (FileSync(seg->mdfd_vfd) < 0)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+ errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m",
seg->mdfd_segno,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ reln->smgr_rnode.relNode,
+ forknum)));
}
}
@@ -1196,7 +1245,7 @@ register_unlink(RelFileNode rnode)
if (pendingOpsTable)
{
/* push it into local pending-ops table */
- RememberFsyncRequest(rnode, UNLINK_RELATION_REQUEST);
+ RememberFsyncRequest(rnode, MAIN_FORKNUM, UNLINK_RELATION_REQUEST);
}
else
{
@@ -1208,7 +1257,8 @@ register_unlink(RelFileNode rnode)
* XXX should we just leave the file orphaned instead?
*/
Assert(IsUnderPostmaster);
- while (!ForwardFsyncRequest(rnode, UNLINK_RELATION_REQUEST))
+ while (!ForwardFsyncRequest(rnode, MAIN_FORKNUM,
+ UNLINK_RELATION_REQUEST))
pg_usleep(10000L); /* 10 msec seems a good number */
}
}
@@ -1233,7 +1283,7 @@ register_unlink(RelFileNode rnode)
* structure for them.)
*/
void
-RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
+RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
{
Assert(pendingOpsTable);
@@ -1246,7 +1296,8 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
hash_seq_init(&hstat, pendingOpsTable);
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
{
- if (RelFileNodeEquals(entry->tag.rnode, rnode))
+ if (RelFileNodeEquals(entry->tag.rnode, rnode) &&
+ entry->tag.forknum == forknum)
{
/* Okay, cancel this entry */
entry->canceled = true;
@@ -1313,6 +1364,7 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
/* ensure any pad bytes in the hash key are zeroed */
MemSet(&key, 0, sizeof(key));
key.rnode = rnode;
+ key.forknum = forknum;
key.segno = segno;
entry = (PendingOperationEntry *) hash_search(pendingOpsTable,
@@ -1346,12 +1398,12 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
* ForgetRelationFsyncRequests -- forget any fsyncs for a rel
*/
void
-ForgetRelationFsyncRequests(RelFileNode rnode)
+ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum)
{
if (pendingOpsTable)
{
/* standalone backend or startup process: fsync state is local */
- RememberFsyncRequest(rnode, FORGET_RELATION_FSYNC);
+ RememberFsyncRequest(rnode, forknum, FORGET_RELATION_FSYNC);
}
else if (IsUnderPostmaster)
{
@@ -1365,7 +1417,7 @@ ForgetRelationFsyncRequests(RelFileNode rnode)
* which would be bad, so I'm inclined to assume that the bgwriter
* will always empty the queue soon.
*/
- while (!ForwardFsyncRequest(rnode, FORGET_RELATION_FSYNC))
+ while (!ForwardFsyncRequest(rnode, forknum, FORGET_RELATION_FSYNC))
pg_usleep(10000L); /* 10 msec seems a good number */
/*
@@ -1390,12 +1442,13 @@ ForgetDatabaseFsyncRequests(Oid dbid)
if (pendingOpsTable)
{
/* standalone backend or startup process: fsync state is local */
- RememberFsyncRequest(rnode, FORGET_DATABASE_FSYNC);
+ RememberFsyncRequest(rnode, InvalidForkNumber, FORGET_DATABASE_FSYNC);
}
else if (IsUnderPostmaster)
{
/* see notes in ForgetRelationFsyncRequests */
- while (!ForwardFsyncRequest(rnode, FORGET_DATABASE_FSYNC))
+ while (!ForwardFsyncRequest(rnode, InvalidForkNumber,
+ FORGET_DATABASE_FSYNC))
pg_usleep(10000L); /* 10 msec seems a good number */
}
}
@@ -1415,14 +1468,15 @@ _fdvec_alloc(void)
* and make a MdfdVec object for it. Returns NULL on failure.
*/
static MdfdVec *
-_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
+_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
+ int oflags)
{
MdfdVec *v;
int fd;
char *path,
*fullpath;
- path = relpath(reln->smgr_rnode);
+ path = relpath(reln->smgr_rnode, forknum);
if (segno > 0)
{
@@ -1449,7 +1503,7 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
v->mdfd_vfd = fd;
v->mdfd_segno = segno;
v->mdfd_chain = NULL;
- Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
+ Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
/* all done */
return v;
@@ -1464,10 +1518,10 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
* in the EXTENSION_CREATE case.
*/
static MdfdVec *
-_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
- ExtensionBehavior behavior)
+_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
+ bool isTemp, ExtensionBehavior behavior)
{
- MdfdVec *v = mdopen(reln, behavior);
+ MdfdVec *v = mdopen(reln, forknum, behavior);
BlockNumber targetseg;
BlockNumber nextsegno;
@@ -1497,20 +1551,21 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
*/
if (behavior == EXTENSION_CREATE || InRecovery)
{
- if (_mdnblocks(reln, v) < RELSEG_SIZE)
+ if (_mdnblocks(reln, forknum, v) < RELSEG_SIZE)
{
char *zerobuf = palloc0(BLCKSZ);
- mdextend(reln, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
+ mdextend(reln, forknum,
+ nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
zerobuf, isTemp);
pfree(zerobuf);
}
- v->mdfd_chain = _mdfd_openseg(reln, nextsegno, O_CREAT);
+ v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT);
}
else
{
/* We won't create segment if not existent */
- v->mdfd_chain = _mdfd_openseg(reln, nextsegno, 0);
+ v->mdfd_chain = _mdfd_openseg(reln, forknum, nextsegno, 0);
}
if (v->mdfd_chain == NULL)
{
@@ -1519,11 +1574,12 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
return NULL;
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not open segment %u of relation %u/%u/%u (target block %u): %m",
+ errmsg("could not open segment %u of relation %u/%u/%u/%u (target block %u): %m",
nextsegno,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
+ forknum,
blkno)));
}
}
@@ -1536,7 +1592,7 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
* Get number of blocks present in a single disk file
*/
static BlockNumber
-_mdnblocks(SMgrRelation reln, MdfdVec *seg)
+_mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
{
off_t len;
@@ -1544,11 +1600,12 @@ _mdnblocks(SMgrRelation reln, MdfdVec *seg)
if (len < 0)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not seek to end of segment %u of relation %u/%u/%u: %m",
+ errmsg("could not seek to end of segment %u of relation %u/%u/%u/%u: %m",
seg->mdfd_segno,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ reln->smgr_rnode.relNode,
+ forknum)));
/* note that this calculation will ignore any partial block at EOF */
return (BlockNumber) (len / BLCKSZ);
}