PostgreSQL Source Code git master
md.h File Reference
#include "storage/aio_types.h"
#include "storage/block.h"
#include "storage/relfilelocator.h"
#include "storage/smgr.h"
#include "storage/sync.h"
Include dependency graph for md.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void mdinit (void)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
void mdcreate (SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
bool mdexists (SMgrRelation reln, ForkNumber forknum)
 
void mdunlink (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
void mdzeroextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
 
bool mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
 
uint32 mdmaxcombine (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdreadv (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
 
void mdstartreadv (PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
 
void mdwritev (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void mdregistersync (SMgrRelation reln, ForkNumber forknum)
 
int mdfd (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileLocator *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Variables

PGDLLIMPORT const PgAioHandleCallbacks aio_md_readv_cb
 

Function Documentation

◆ DropRelationFiles()

void DropRelationFiles ( RelFileLocator delrels,
int  ndelrels,
bool  isRedo 
)

Definition at line 1597 of file md.c.

1598{
1599 SMgrRelation *srels;
1600 int i;
1601
1602 srels = palloc(sizeof(SMgrRelation) * ndelrels);
1603 for (i = 0; i < ndelrels; i++)
1604 {
1605 SMgrRelation srel = smgropen(delrels[i], INVALID_PROC_NUMBER);
1606
1607 if (isRedo)
1608 {
1609 ForkNumber fork;
1610
1611 for (fork = 0; fork <= MAX_FORKNUM; fork++)
1612 XLogDropRelation(delrels[i], fork);
1613 }
1614 srels[i] = srel;
1615 }
1616
1617 smgrdounlinkall(srels, ndelrels, isRedo);
1618
1619 for (i = 0; i < ndelrels; i++)
1620 smgrclose(srels[i]);
1621 pfree(srels);
1622}
int i
Definition: isn.c:77
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc(Size size)
Definition: mcxt.c:1365
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
ForkNumber
Definition: relpath.h:56
#define MAX_FORKNUM
Definition: relpath.h:70
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:240
void smgrclose(SMgrRelation reln)
Definition: smgr.c:374
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:538
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
Definition: xlogutils.c:630

References i, INVALID_PROC_NUMBER, MAX_FORKNUM, palloc(), pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)

Definition at line 1579 of file md.c.

1580{
1581 FileTag tag;
1582 RelFileLocator rlocator;
1583
1584 rlocator.dbOid = dbid;
1585 rlocator.spcOid = 0;
1586 rlocator.relNumber = 0;
1587
1589
1590 RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
1591}
#define InvalidBlockNumber
Definition: block.h:33
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
Definition: md.c:101
@ InvalidForkNumber
Definition: relpath.h:57
Definition: sync.h:51
RelFileNumber relNumber
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:580
@ SYNC_FILTER_REQUEST
Definition: sync.h:28

References RelFileLocator::dbOid, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileLocator::relNumber, RelFileLocator::spcOid, and SYNC_FILTER_REQUEST.

Referenced by createdb_failure_callback(), dbase_redo(), and dropdb().

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 713 of file md.c.

714{
715 int nopensegs = reln->md_num_open_segs[forknum];
716
717 /* No work if already closed */
718 if (nopensegs == 0)
719 return;
720
721 /* close segments starting from the end */
722 while (nopensegs > 0)
723 {
724 MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
725
727 _fdvec_resize(reln, forknum, nopensegs - 1);
728 nopensegs--;
729 }
730}
void FileClose(File file)
Definition: fd.c:1979
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1629
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:61
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:62
Definition: md.c:92
File mdfd_vfd
Definition: md.c:93

References _fdvec_resize(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 222 of file md.c.

223{
224 MdfdVec *mdfd;
225 RelPathStr path;
226 File fd;
227
228 if (isRedo && reln->md_num_open_segs[forknum] > 0)
229 return; /* created and opened already... */
230
231 Assert(reln->md_num_open_segs[forknum] == 0);
232
233 /*
234 * We may be using the target table space for the first time in this
235 * database, so create a per-database subdirectory if needed.
236 *
237 * XXX this is a fairly ugly violation of module layering, but this seems
238 * to be the best place to put the check. Maybe TablespaceCreateDbspace
239 * should be here and not in commands/tablespace.c? But that would imply
240 * importing a lot of stuff that smgr.c oughtn't know, either.
241 */
244 isRedo);
245
246 path = relpath(reln->smgr_rlocator, forknum);
247
248 fd = PathNameOpenFile(path.str, _mdfd_open_flags() | O_CREAT | O_EXCL);
249
250 if (fd < 0)
251 {
252 int save_errno = errno;
253
254 if (isRedo)
256 if (fd < 0)
257 {
258 /* be sure to report the error reported by create, not open */
259 errno = save_errno;
262 errmsg("could not create file \"%s\": %m", path.str)));
263 }
264 }
265
266 _fdvec_resize(reln, forknum, 1);
267 mdfd = &reln->md_seg_fds[forknum][0];
268 mdfd->mdfd_vfd = fd;
269 mdfd->mdfd_segno = 0;
270
271 if (!SmgrIsTemp(reln))
272 register_dirty_segment(reln, forknum, mdfd);
273}
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
Definition: tablespace.c:112
int errcode_for_file_access(void)
Definition: elog.c:886
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:150
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1576
int File
Definition: fd.h:51
Assert(PointerIsAligned(start, uint64))
int mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off)
Definition: md.c:1480
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1504
static int _mdfd_open_flags(void)
Definition: md.c:176
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define relpath(rlocator, forknum)
Definition: relpath.h:150
#define SmgrIsTemp(smgr)
Definition: smgr.h:74
RelFileLocator locator
char str[REL_PATH_STR_MAXLEN+1]
Definition: relpath.h:123
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:38

References _fdvec_resize(), _mdfd_open_flags(), Assert(), RelFileLocator::dbOid, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), RelFileLocatorBackend::locator, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, mdfd(), PathNameOpenFile(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, RelFileLocator::spcOid, RelPathStr::str, and TablespaceCreateDbspace().

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 203 of file md.c.

204{
205 /*
206 * Close it first, to ensure that we notice if the fork has been unlinked
207 * since we opened it. As an optimization, we can skip that in recovery,
208 * which already closes relations when dropping them.
209 */
210 if (!InRecovery)
211 mdclose(reln, forknum);
212
213 return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
214}
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:713
#define EXTENSION_RETURN_NULL
Definition: md.c:115
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:664
bool InRecovery
Definition: xlogutils.c:50

References EXTENSION_RETURN_NULL, InRecovery, mdclose(), and mdopenfork().

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void *  buffer,
bool  skipFsync 
)

Definition at line 487 of file md.c.

489{
490 pgoff_t seekpos;
491 int nbytes;
492 MdfdVec *v;
493
494 /* If this build supports direct I/O, the buffer must be I/O aligned. */
495 if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
496 Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
497
498 /* This assert is too expensive to have on normally ... */
499#ifdef CHECK_WRITE_VS_EXTEND
500 Assert(blocknum >= mdnblocks(reln, forknum));
501#endif
502
503 /*
504 * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
505 * more --- we mustn't create a block whose number actually is
506 * InvalidBlockNumber. (Note that this failure should be unreachable
507 * because of upstream checks in bufmgr.c.)
508 */
509 if (blocknum == InvalidBlockNumber)
511 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
512 errmsg("cannot extend file \"%s\" beyond %u blocks",
513 relpath(reln->smgr_rlocator, forknum).str,
515
516 v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
517
518 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
519
520 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
521
522 if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
523 {
524 if (nbytes < 0)
527 errmsg("could not extend file \"%s\": %m",
529 errhint("Check free disk space.")));
530 /* short write: complain appropriately */
532 (errcode(ERRCODE_DISK_FULL),
533 errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
535 nbytes, BLCKSZ, blocknum),
536 errhint("Check free disk space.")));
537 }
538
539 if (!skipFsync && !SmgrIsTemp(reln))
540 register_dirty_segment(reln, forknum, v);
541
542 Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
543}
uint32 BlockNumber
Definition: block.h:31
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:806
int errhint(const char *fmt,...)
Definition: elog.c:1330
int errcode(int sqlerrcode)
Definition: elog.c:863
char * FilePathName(File file)
Definition: fd.c:2513
static ssize_t FileWrite(File file, const void *buffer, size_t amount, pgoff_t offset, uint32 wait_event_info)
Definition: fd.h:210
#define PG_O_DIRECT
Definition: fd.h:96
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1869
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:1223
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1740
#define EXTENSION_CREATE
Definition: md.c:117
#define PG_IO_ALIGN_SIZE
#define pgoff_t
Definition: port.h:422

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), PG_IO_ALIGN_SIZE, PG_O_DIRECT, pgoff_t, register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and TYPEALIGN.

Referenced by _mdfd_getseg().

◆ mdfd()

int mdfd ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
uint32 off 
)

Definition at line 1480 of file md.c.

1481{
1482 MdfdVec *v = mdopenfork(reln, forknum, EXTENSION_FAIL);
1483
1484 v = _mdfd_getseg(reln, forknum, blocknum, false,
1486
1487 *off = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1488
1489 Assert(*off < (pgoff_t) BLCKSZ * RELSEG_SIZE);
1490
1491 return FileGetRawDesc(v->mdfd_vfd);
1492}
int FileGetRawDesc(File file)
Definition: fd.c:2529
#define EXTENSION_FAIL
Definition: md.c:113

References _mdfd_getseg(), Assert(), EXTENSION_FAIL, FileGetRawDesc(), _MdfdVec::mdfd_vfd, mdopenfork(), and pgoff_t.

Referenced by mdcreate(), and mdopenfork().

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)

Definition at line 1960 of file md.c.

1961{
1962 /*
1963 * For now we only use filter requests as a way to drop all scheduled
1964 * callbacks relating to a given database, when dropping the database.
1965 * We'll return true for all candidates that have the same database OID as
1966 * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1967 */
1968 return ftag->rlocator.dbOid == candidate->rlocator.dbOid;
1969}
RelFileLocator rlocator
Definition: sync.h:54

References RelFileLocator::dbOid, and FileTag::rlocator.

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1427 of file md.c.

1428{
1429 int segno;
1430 int min_inactive_seg;
1431
1432 /*
1433 * NOTE: mdnblocks makes sure we have opened all active segments, so that
1434 * the loop below will get them all!
1435 */
1436 mdnblocks(reln, forknum);
1437
1438 min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1439
1440 /*
1441 * Temporarily open inactive segments, then close them after sync. There
1442 * may be some inactive segments left opened after fsync() error, but that
1443 * is harmless. We don't bother to clean them up and take a risk of
1444 * further trouble. The next mdclose() will soon close them.
1445 */
1446 while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1447 segno++;
1448
1449 while (segno > 0)
1450 {
1451 MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1452
1453 /*
1454 * fsyncs done through mdimmedsync() should be tracked in a separate
1455 * IOContext than those done through mdsyncfiletag() to differentiate
1456 * between unavoidable client backend fsyncs (e.g. those done during
1457 * index build) and those which ideally would have been done by the
1458 * checkpointer. Since other IO operations bypassing the buffer
1459 * manager could also be tracked in such an IOContext, wait until
1460 * these are also tracked to track immediate fsyncs.
1461 */
1462 if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0)
1465 errmsg("could not fsync file \"%s\": %m",
1466 FilePathName(v->mdfd_vfd))));
1467
1468 /* Close inactive segments immediately */
1469 if (segno > min_inactive_seg)
1470 {
1471 FileClose(v->mdfd_vfd);
1472 _fdvec_resize(reln, forknum, segno - 1);
1473 }
1474
1475 segno--;
1476 }
1477}
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2349
int data_sync_elevel(int elevel)
Definition: fd.c:3998
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
Definition: md.c:1697

References _fdvec_resize(), _mdfd_openseg(), data_sync_elevel(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileSync(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, and mdnblocks().

◆ mdinit()

void mdinit ( void  )

Definition at line 190 of file md.c.

191{
193 "MdSmgr",
195}
MemoryContext TopMemoryContext
Definition: mcxt.c:166
static MemoryContext MdCxt
Definition: md.c:97
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, MdCxt, and TopMemoryContext.

◆ mdmaxcombine()

uint32 mdmaxcombine ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)

Definition at line 833 of file md.c.

835{
836 BlockNumber segoff;
837
838 segoff = blocknum % ((BlockNumber) RELSEG_SIZE);
839
840 return RELSEG_SIZE - segoff;
841}

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1223 of file md.c.

1224{
1225 MdfdVec *v;
1226 BlockNumber nblocks;
1227 BlockNumber segno;
1228
1229 mdopenfork(reln, forknum, EXTENSION_FAIL);
1230
1231 /* mdopen has opened the first segment */
1232 Assert(reln->md_num_open_segs[forknum] > 0);
1233
1234 /*
1235 * Start from the last open segments, to avoid redundant seeks. We have
1236 * previously verified that these segments are exactly RELSEG_SIZE long,
1237 * and it's useless to recheck that each time.
1238 *
1239 * NOTE: this assumption could only be wrong if another backend has
1240 * truncated the relation. We rely on higher code levels to handle that
1241 * scenario by closing and re-opening the md fd, which is handled via
1242 * relcache flush. (Since the checkpointer doesn't participate in
1243 * relcache flush, it could have segment entries for inactive segments;
1244 * that's OK because the checkpointer never needs to compute relation
1245 * size.)
1246 */
1247 segno = reln->md_num_open_segs[forknum] - 1;
1248 v = &reln->md_seg_fds[forknum][segno];
1249
1250 for (;;)
1251 {
1252 nblocks = _mdnblocks(reln, forknum, v);
1253 if (nblocks > ((BlockNumber) RELSEG_SIZE))
1254 elog(FATAL, "segment too big");
1255 if (nblocks < ((BlockNumber) RELSEG_SIZE))
1256 return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
1257
1258 /*
1259 * If segment is exactly RELSEG_SIZE, advance to next one.
1260 */
1261 segno++;
1262
1263 /*
1264 * We used to pass O_CREAT here, but that has the disadvantage that it
1265 * might create a segment which has vanished through some operating
1266 * system misadventure. In such a case, creating the segment here
1267 * undermines _mdfd_getseg's attempts to notice and report an error
1268 * upon access to a missing segment.
1269 */
1270 v = _mdfd_openseg(reln, forknum, segno, 0);
1271 if (v == NULL)
1272 return segno * ((BlockNumber) RELSEG_SIZE);
1273 }
1274}
#define FATAL
Definition: elog.h:41
#define elog(elevel,...)
Definition: elog.h:226

References _mdfd_openseg(), _mdnblocks(), Assert(), elog, EXTENSION_FAIL, FATAL, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdregistersync(), mdwritev(), and mdzeroextend().

◆ mdopen()

void mdopen ( SMgrRelation  reln)

Definition at line 702 of file md.c.

703{
704 /* mark it not open */
705 for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
706 reln->md_num_open_segs[forknum] = 0;
707}

References MAX_FORKNUM, and SMgrRelationData::md_num_open_segs.

◆ mdprefetch()

bool mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks 
)

Definition at line 736 of file md.c.

738{
739#ifdef USE_PREFETCH
740
742
743 if ((uint64) blocknum + nblocks > (uint64) MaxBlockNumber + 1)
744 return false;
745
746 while (nblocks > 0)
747 {
748 pgoff_t seekpos;
749 MdfdVec *v;
750 int nblocks_this_segment;
751
752 v = _mdfd_getseg(reln, forknum, blocknum, false,
754 if (v == NULL)
755 return false;
756
757 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
758
759 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
760
761 nblocks_this_segment =
762 Min(nblocks,
763 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
764
765 (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ * nblocks_this_segment,
766 WAIT_EVENT_DATA_FILE_PREFETCH);
767
768 blocknum += nblocks_this_segment;
769 nblocks -= nblocks_this_segment;
770 }
771#endif /* USE_PREFETCH */
772
773 return true;
774}
#define MaxBlockNumber
Definition: block.h:35
#define Min(x, y)
Definition: c.h:1006
uint64_t uint64
Definition: c.h:542
int io_direct_flags
Definition: fd.c:168
int FilePrefetch(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
Definition: fd.c:2080
#define IO_DIRECT_DATA
Definition: fd.h:54

References _mdfd_getseg(), Assert(), EXTENSION_FAIL, EXTENSION_RETURN_NULL, FilePrefetch(), InRecovery, IO_DIRECT_DATA, io_direct_flags, MaxBlockNumber, _MdfdVec::mdfd_vfd, Min, and pgoff_t.

◆ mdreadv()

void mdreadv ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void **  buffers,
BlockNumber  nblocks 
)

Definition at line 847 of file md.c.

849{
850 while (nblocks > 0)
851 {
852 struct iovec iov[PG_IOV_MAX];
853 int iovcnt;
854 pgoff_t seekpos;
855 int nbytes;
856 MdfdVec *v;
857 BlockNumber nblocks_this_segment;
858 size_t transferred_this_segment;
859 size_t size_this_segment;
860
861 v = _mdfd_getseg(reln, forknum, blocknum, false,
863
864 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
865
866 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
867
868 nblocks_this_segment =
869 Min(nblocks,
870 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
871 nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
872
873 if (nblocks_this_segment != nblocks)
874 elog(ERROR, "read crosses segment boundary");
875
876 iovcnt = buffers_to_iovec(iov, buffers, nblocks_this_segment);
877 size_this_segment = nblocks_this_segment * BLCKSZ;
878 transferred_this_segment = 0;
879
880 /*
881 * Inner loop to continue after a short read. We'll keep going until
882 * we hit EOF rather than assuming that a short read means we hit the
883 * end.
884 */
885 for (;;)
886 {
887 TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
891 reln->smgr_rlocator.backend);
892 nbytes = FileReadV(v->mdfd_vfd, iov, iovcnt, seekpos,
893 WAIT_EVENT_DATA_FILE_READ);
894 TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
899 nbytes,
900 size_this_segment - transferred_this_segment);
901
902#ifdef SIMULATE_SHORT_READ
903 nbytes = Min(nbytes, 4096);
904#endif
905
906 if (nbytes < 0)
909 errmsg("could not read blocks %u..%u in file \"%s\": %m",
910 blocknum,
911 blocknum + nblocks_this_segment - 1,
912 FilePathName(v->mdfd_vfd))));
913
914 if (nbytes == 0)
915 {
916 /*
917 * We are at or past EOF, or we read a partial block at EOF.
918 * Normally this is an error; upper levels should never try to
919 * read a nonexistent block. However, if zero_damaged_pages
920 * is ON or we are InRecovery, we should instead return zeroes
921 * without complaining. This allows, for example, the case of
922 * trying to update a block that was later truncated away.
923 *
924 * NB: We think that this codepath is unreachable in recovery
925 * and incomplete with zero_damaged_pages, as missing segments
926 * are not created. Putting blocks into the buffer-pool that
927 * do not exist on disk is rather problematic, as it will not
928 * be found by scans that rely on smgrnblocks(), as they are
929 * beyond EOF. It also can cause weird problems with relation
930 * extension, as relation extension does not expect blocks
931 * beyond EOF to exist.
932 *
933 * Therefore we do not want to copy the logic into
934 * mdstartreadv(), where it would have to be more complicated
935 * due to potential differences in the zero_damaged_pages
936 * setting between the definer and completor of IO.
937 *
938 * For PG 18, we are putting an Assert(false) in mdreadv()
939 * (triggering failures in assertion-enabled builds, but
940 * continuing to work in production builds). Afterwards we
941 * plan to remove this code entirely.
942 */
944 {
945 Assert(false); /* see comment above */
946
947 for (BlockNumber i = transferred_this_segment / BLCKSZ;
948 i < nblocks_this_segment;
949 ++i)
950 memset(buffers[i], 0, BLCKSZ);
951 break;
952 }
953 else
956 errmsg("could not read blocks %u..%u in file \"%s\": read only %zu of %zu bytes",
957 blocknum,
958 blocknum + nblocks_this_segment - 1,
960 transferred_this_segment,
961 size_this_segment)));
962 }
963
964 /* One loop should usually be enough. */
965 transferred_this_segment += nbytes;
966 Assert(transferred_this_segment <= size_this_segment);
967 if (transferred_this_segment == size_this_segment)
968 break;
969
970 /* Adjust position and vectors after a short read. */
971 seekpos += nbytes;
972 iovcnt = compute_remaining_iovec(iov, iov, iovcnt, nbytes);
973 }
974
975 nblocks -= nblocks_this_segment;
976 buffers += nblocks_this_segment;
977 blocknum += nblocks_this_segment;
978 }
979}
bool zero_damaged_pages
Definition: bufmgr.c:144
#define lengthof(array)
Definition: c.h:790
ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info)
Definition: fd.c:2162
int compute_remaining_iovec(struct iovec *destination, const struct iovec *source, int iovcnt, size_t transferred)
Definition: file_utils.c:614
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:119
static int buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
Definition: md.c:784
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:42
#define PG_IOV_MAX
Definition: pg_iovec.h:47

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, buffers_to_iovec(), compute_remaining_iovec(), RelFileLocator::dbOid, elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileReadV(), i, InRecovery, lengthof, RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, Min, PG_IOV_MAX, pgoff_t, RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, and zero_damaged_pages.

◆ mdregistersync()

void mdregistersync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1376 of file md.c.

1377{
1378 int segno;
1379 int min_inactive_seg;
1380
1381 /*
1382 * NOTE: mdnblocks makes sure we have opened all active segments, so that
1383 * the loop below will get them all!
1384 */
1385 mdnblocks(reln, forknum);
1386
1387 min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1388
1389 /*
1390 * Temporarily open inactive segments, then close them after sync. There
1391 * may be some inactive segments left opened after error, but that is
1392 * harmless. We don't bother to clean them up and take a risk of further
1393 * trouble. The next mdclose() will soon close them.
1394 */
1395 while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1396 segno++;
1397
1398 while (segno > 0)
1399 {
1400 MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1401
1402 register_dirty_segment(reln, forknum, v);
1403
1404 /* Close inactive segments immediately */
1405 if (segno > min_inactive_seg)
1406 {
1407 FileClose(v->mdfd_vfd);
1408 _fdvec_resize(reln, forknum, segno - 1);
1409 }
1410
1411 segno--;
1412 }
1413}

References _fdvec_resize(), _mdfd_openseg(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), and register_dirty_segment().

◆ mdstartreadv()

void mdstartreadv ( PgAioHandle ioh,
SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void **  buffers,
BlockNumber  nblocks 
)

Definition at line 985 of file md.c.

988{
989 pgoff_t seekpos;
990 MdfdVec *v;
991 BlockNumber nblocks_this_segment;
992 struct iovec *iov;
993 int iovcnt;
994 int ret;
995
996 v = _mdfd_getseg(reln, forknum, blocknum, false,
998
999 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1000
1001 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
1002
1003 nblocks_this_segment =
1004 Min(nblocks,
1005 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
1006
1007 if (nblocks_this_segment != nblocks)
1008 elog(ERROR, "read crossing segment boundary");
1009
1010 iovcnt = pgaio_io_get_iovec(ioh, &iov);
1011
1012 Assert(nblocks <= iovcnt);
1013
1014 iovcnt = buffers_to_iovec(iov, buffers, nblocks_this_segment);
1015
1016 Assert(iovcnt <= nblocks_this_segment);
1017
1020
1022 reln,
1023 forknum,
1024 blocknum,
1025 nblocks,
1026 false);
1028
1029 ret = FileStartReadV(ioh, v->mdfd_vfd, iovcnt, seekpos, WAIT_EVENT_DATA_FILE_READ);
1030 if (ret != 0)
1031 ereport(ERROR,
1033 errmsg("could not start reading blocks %u..%u in file \"%s\": %m",
1034 blocknum,
1035 blocknum + nblocks_this_segment - 1,
1036 FilePathName(v->mdfd_vfd))));
1037
1038 /*
1039 * The error checks corresponding to the post-read checks in mdreadv() are
1040 * in md_readv_complete().
1041 *
1042 * However we chose, at least for now, to not implement the
1043 * zero_damaged_pages logic present in mdreadv(). As outlined in mdreadv()
1044 * that logic is rather problematic, and we want to get rid of it. Here
1045 * equivalent logic would have to be more complicated due to potential
1046 * differences in the zero_damaged_pages setting between the definer and
1047 * completor of IO.
1048 */
1049}
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
Definition: aio.c:330
@ PGAIO_HCB_MD_READV
Definition: aio.h:196
@ PGAIO_HF_BUFFERED
Definition: aio.h:77
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
Definition: aio_callback.c:86
int pgaio_io_get_iovec(PgAioHandle *ioh, struct iovec **iov)
Definition: aio_io.c:42
int FileStartReadV(PgAioHandle *ioh, File file, int iovcnt, pgoff_t offset, uint32 wait_event_info)
Definition: fd.c:2218
void pgaio_io_set_target_smgr(PgAioHandle *ioh, SMgrRelationData *smgr, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skip_fsync)
Definition: smgr.c:1029

References _mdfd_getseg(), Assert(), buffers_to_iovec(), elog, ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileStartReadV(), IO_DIRECT_DATA, io_direct_flags, _MdfdVec::mdfd_vfd, Min, PGAIO_HCB_MD_READV, PGAIO_HF_BUFFERED, pgaio_io_get_iovec(), pgaio_io_register_callbacks(), pgaio_io_set_flag(), pgaio_io_set_target_smgr(), and pgoff_t.

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1890 of file md.c.

1891{
1893 File file;
1894 instr_time io_start;
1895 bool need_to_close;
1896 int result,
1897 save_errno;
1898
1899 /* See if we already have the file open, or need to open it. */
1900 if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
1901 {
1902 file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
1903 strlcpy(path, FilePathName(file), MAXPGPATH);
1904 need_to_close = false;
1905 }
1906 else
1907 {
1908 MdPathStr p;
1909
1910 p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1911 strlcpy(path, p.str, MD_PATH_STR_MAXLEN);
1912
1913 file = PathNameOpenFile(path, _mdfd_open_flags());
1914 if (file < 0)
1915 return -1;
1916 need_to_close = true;
1917 }
1918
1920
1921 /* Sync the file. */
1922 result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1923 save_errno = errno;
1924
1925 if (need_to_close)
1926 FileClose(file);
1927
1929 IOOP_FSYNC, io_start, 1, 0);
1930
1931 errno = save_errno;
1932 return result;
1933}
bool track_io_timing
Definition: bufmgr.c:147
static MdPathStr _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1677
#define MD_PATH_STR_MAXLEN
Definition: md.c:132
#define MAXPGPATH
@ IOOBJECT_RELATION
Definition: pgstat.h:277
@ IOCONTEXT_NORMAL
Definition: pgstat.h:289
@ IOOP_FSYNC
Definition: pgstat.h:308
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:91
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:122
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int16 forknum
Definition: sync.h:53
uint64 segno
Definition: sync.h:55
Definition: md.c:139
char str[MD_PATH_STR_MAXLEN+1]
Definition: md.c:140

References _mdfd_open_flags(), _mdfd_segpath(), FileClose(), FilePathName(), FileSync(), FileTag::forknum, INVALID_PROC_NUMBER, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, MAXPGPATH, SMgrRelationData::md_num_open_segs, MD_PATH_STR_MAXLEN, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), FileTag::rlocator, FileTag::segno, smgropen(), MdPathStr::str, strlcpy(), and track_io_timing.

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  curnblk,
BlockNumber  nblocks 
)

Definition at line 1287 of file md.c.

1289{
1290 BlockNumber priorblocks;
1291 int curopensegs;
1292
1293 if (nblocks > curnblk)
1294 {
1295 /* Bogus request ... but no complaint if InRecovery */
1296 if (InRecovery)
1297 return;
1298 ereport(ERROR,
1299 (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
1300 relpath(reln->smgr_rlocator, forknum).str,
1301 nblocks, curnblk)));
1302 }
1303 if (nblocks == curnblk)
1304 return; /* no work */
1305
1306 /*
1307 * Truncate segments, starting at the last one. Starting at the end makes
1308 * managing the memory for the fd array easier, should there be errors.
1309 */
1310 curopensegs = reln->md_num_open_segs[forknum];
1311 while (curopensegs > 0)
1312 {
1313 MdfdVec *v;
1314
1315 priorblocks = (curopensegs - 1) * RELSEG_SIZE;
1316
1317 v = &reln->md_seg_fds[forknum][curopensegs - 1];
1318
1319 if (priorblocks > nblocks)
1320 {
1321 /*
1322 * This segment is no longer active. We truncate the file, but do
1323 * not delete it, for reasons explained in the header comments.
1324 */
1325 if (FileTruncate(v->mdfd_vfd, 0, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1326 ereport(ERROR,
1328 errmsg("could not truncate file \"%s\": %m",
1329 FilePathName(v->mdfd_vfd))));
1330
1331 if (!SmgrIsTemp(reln))
1332 register_dirty_segment(reln, forknum, v);
1333
1334 /* we never drop the 1st segment */
1335 Assert(v != &reln->md_seg_fds[forknum][0]);
1336
1337 FileClose(v->mdfd_vfd);
1338 _fdvec_resize(reln, forknum, curopensegs - 1);
1339 }
1340 else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
1341 {
1342 /*
1343 * This is the last segment we want to keep. Truncate the file to
1344 * the right length. NOTE: if nblocks is exactly a multiple K of
1345 * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
1346 * keep it. This adheres to the invariant given in the header
1347 * comments.
1348 */
1349 BlockNumber lastsegblocks = nblocks - priorblocks;
1350
1351 if (FileTruncate(v->mdfd_vfd, (pgoff_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1352 ereport(ERROR,
1354 errmsg("could not truncate file \"%s\" to %u blocks: %m",
1356 nblocks)));
1357 if (!SmgrIsTemp(reln))
1358 register_dirty_segment(reln, forknum, v);
1359 }
1360 else
1361 {
1362 /*
1363 * We still need this segment, so nothing to do for this and any
1364 * earlier segment.
1365 */
1366 break;
1367 }
1368 curopensegs--;
1369 }
1370}
int FileTruncate(File file, pgoff_t offset, uint32 wait_event_info)
Definition: fd.c:2478

References _fdvec_resize(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileTruncate(), InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, pgoff_t, register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, and SmgrIsTemp.

◆ mdunlink()

void mdunlink ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 337 of file md.c.

338{
339 /* Now do the per-fork work */
340 if (forknum == InvalidForkNumber)
341 {
342 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
343 mdunlinkfork(rlocator, forknum, isRedo);
344 }
345 else
346 mdunlinkfork(rlocator, forknum, isRedo);
347}
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: md.c:374

References InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1942 of file md.c.

1943{
1944 RelPathStr p;
1945
1946 /* Compute the path. */
1947 p = relpathperm(ftag->rlocator, MAIN_FORKNUM);
1948 strlcpy(path, p.str, MAXPGPATH);
1949
1950 /* Try to unlink the file. */
1951 return unlink(path);
1952}
@ MAIN_FORKNUM
Definition: relpath.h:58
#define relpathperm(rlocator, forknum)
Definition: relpath.h:146

References MAIN_FORKNUM, MAXPGPATH, relpathperm, FileTag::rlocator, RelPathStr::str, and strlcpy().

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)

Definition at line 1164 of file md.c.

1166{
1168
1169 /*
1170 * Issue flush requests in as few requests as possible; have to split at
1171 * segment boundaries though, since those are actually separate files.
1172 */
1173 while (nblocks > 0)
1174 {
1175 BlockNumber nflush = nblocks;
1176 pgoff_t seekpos;
1177 MdfdVec *v;
1178 int segnum_start,
1179 segnum_end;
1180
1181 v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
1183
1184 /*
1185 * We might be flushing buffers of already removed relations, that's
1186 * ok, just ignore that case. If the segment file wasn't open already
1187 * (ie from a recent mdwrite()), then we don't want to re-open it, to
1188 * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave
1189 * us with a descriptor to a file that is about to be unlinked.
1190 */
1191 if (!v)
1192 return;
1193
1194 /* compute offset inside the current segment */
1195 segnum_start = blocknum / RELSEG_SIZE;
1196
1197 /* compute number of desired writes within the current segment */
1198 segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
1199 if (segnum_start != segnum_end)
1200 nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
1201
1202 Assert(nflush >= 1);
1203 Assert(nflush <= nblocks);
1204
1205 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1206
1207 FileWriteback(v->mdfd_vfd, seekpos, (pgoff_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
1208
1209 nblocks -= nflush;
1210 blocknum += nflush;
1211 }
1212}
void FileWriteback(File file, pgoff_t offset, pgoff_t nbytes, uint32 wait_event_info)
Definition: fd.c:2136
#define EXTENSION_DONT_OPEN
Definition: md.c:121

References _mdfd_getseg(), Assert(), EXTENSION_DONT_OPEN, FileWriteback(), IO_DIRECT_DATA, io_direct_flags, _MdfdVec::mdfd_vfd, and pgoff_t.

◆ mdwritev()

void mdwritev ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void **  buffers,
BlockNumber  nblocks,
bool  skipFsync 
)

Definition at line 1059 of file md.c.

1061{
1062 /* This assert is too expensive to have on normally ... */
1063#ifdef CHECK_WRITE_VS_EXTEND
1064 Assert((uint64) blocknum + (uint64) nblocks <= (uint64) mdnblocks(reln, forknum));
1065#endif
1066
1067 while (nblocks > 0)
1068 {
1069 struct iovec iov[PG_IOV_MAX];
1070 int iovcnt;
1071 pgoff_t seekpos;
1072 int nbytes;
1073 MdfdVec *v;
1074 BlockNumber nblocks_this_segment;
1075 size_t transferred_this_segment;
1076 size_t size_this_segment;
1077
1078 v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
1080
1081 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1082
1083 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
1084
1085 nblocks_this_segment =
1086 Min(nblocks,
1087 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
1088 nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
1089
1090 if (nblocks_this_segment != nblocks)
1091 elog(ERROR, "write crosses segment boundary");
1092
1093 iovcnt = buffers_to_iovec(iov, (void **) buffers, nblocks_this_segment);
1094 size_this_segment = nblocks_this_segment * BLCKSZ;
1095 transferred_this_segment = 0;
1096
1097 /*
1098 * Inner loop to continue after a short write. If the reason is that
1099 * we're out of disk space, a future attempt should get an ENOSPC
1100 * error from the kernel.
1101 */
1102 for (;;)
1103 {
1104 TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
1108 reln->smgr_rlocator.backend);
1109 nbytes = FileWriteV(v->mdfd_vfd, iov, iovcnt, seekpos,
1110 WAIT_EVENT_DATA_FILE_WRITE);
1111 TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
1115 reln->smgr_rlocator.backend,
1116 nbytes,
1117 size_this_segment - transferred_this_segment);
1118
1119#ifdef SIMULATE_SHORT_WRITE
1120 nbytes = Min(nbytes, 4096);
1121#endif
1122
1123 if (nbytes < 0)
1124 {
1125 bool enospc = errno == ENOSPC;
1126
1127 ereport(ERROR,
1129 errmsg("could not write blocks %u..%u in file \"%s\": %m",
1130 blocknum,
1131 blocknum + nblocks_this_segment - 1,
1133 enospc ? errhint("Check free disk space.") : 0));
1134 }
1135
1136 /* One loop should usually be enough. */
1137 transferred_this_segment += nbytes;
1138 Assert(transferred_this_segment <= size_this_segment);
1139 if (transferred_this_segment == size_this_segment)
1140 break;
1141
1142 /* Adjust position and iovecs after a short write. */
1143 seekpos += nbytes;
1144 iovcnt = compute_remaining_iovec(iov, iov, iovcnt, nbytes);
1145 }
1146
1147 if (!skipFsync && !SmgrIsTemp(reln))
1148 register_dirty_segment(reln, forknum, v);
1149
1150 nblocks -= nblocks_this_segment;
1151 buffers += nblocks_this_segment;
1152 blocknum += nblocks_this_segment;
1153 }
1154}
ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info)
Definition: fd.c:2244

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, buffers_to_iovec(), compute_remaining_iovec(), RelFileLocator::dbOid, elog, ereport, errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileWriteV(), lengthof, RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, mdnblocks(), Min, PG_IOV_MAX, pgoff_t, register_dirty_segment(), RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and RelFileLocator::spcOid.

◆ mdzeroextend()

void mdzeroextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks,
bool  skipFsync 
)

Definition at line 552 of file md.c.

554{
555 MdfdVec *v;
556 BlockNumber curblocknum = blocknum;
557 int remblocks = nblocks;
558
559 Assert(nblocks > 0);
560
561 /* This assert is too expensive to have on normally ... */
562#ifdef CHECK_WRITE_VS_EXTEND
563 Assert(blocknum >= mdnblocks(reln, forknum));
564#endif
565
566 /*
567 * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
568 * more --- we mustn't create a block whose number actually is
569 * InvalidBlockNumber or larger.
570 */
571 if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
573 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
574 errmsg("cannot extend file \"%s\" beyond %u blocks",
575 relpath(reln->smgr_rlocator, forknum).str,
577
578 while (remblocks > 0)
579 {
580 BlockNumber segstartblock = curblocknum % ((BlockNumber) RELSEG_SIZE);
581 pgoff_t seekpos = (pgoff_t) BLCKSZ * segstartblock;
582 int numblocks;
583
584 if (segstartblock + remblocks > RELSEG_SIZE)
585 numblocks = RELSEG_SIZE - segstartblock;
586 else
587 numblocks = remblocks;
588
589 v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE);
590
591 Assert(segstartblock < RELSEG_SIZE);
592 Assert(segstartblock + numblocks <= RELSEG_SIZE);
593
594 /*
595 * If available and useful, use posix_fallocate() (via
596 * FileFallocate()) to extend the relation. That's often more
597 * efficient than using write(), as it commonly won't cause the kernel
598 * to allocate page cache space for the extended pages.
599 *
600 * However, we don't use FileFallocate() for small extensions, as it
601 * defeats delayed allocation on some filesystems. Not clear where
602 * that decision should be made though? For now just use a cutoff of
603 * 8, anything between 4 and 8 worked OK in some local testing.
604 */
605 if (numblocks > 8)
606 {
607 int ret;
608
609 ret = FileFallocate(v->mdfd_vfd,
610 seekpos, (pgoff_t) BLCKSZ * numblocks,
611 WAIT_EVENT_DATA_FILE_EXTEND);
612 if (ret != 0)
613 {
616 errmsg("could not extend file \"%s\" with FileFallocate(): %m",
618 errhint("Check free disk space."));
619 }
620 }
621 else
622 {
623 int ret;
624
625 /*
626 * Even if we don't want to use fallocate, we can still extend a
627 * bit more efficiently than writing each 8kB block individually.
628 * pg_pwrite_zeros() (via FileZero()) uses pg_pwritev_with_retry()
629 * to avoid multiple writes or needing a zeroed buffer for the
630 * whole length of the extension.
631 */
632 ret = FileZero(v->mdfd_vfd,
633 seekpos, (pgoff_t) BLCKSZ * numblocks,
634 WAIT_EVENT_DATA_FILE_EXTEND);
635 if (ret < 0)
638 errmsg("could not extend file \"%s\": %m",
640 errhint("Check free disk space."));
641 }
642
643 if (!skipFsync && !SmgrIsTemp(reln))
644 register_dirty_segment(reln, forknum, v);
645
646 Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
647
648 remblocks -= numblocks;
649 curblocknum += numblocks;
650 }
651}
int FileFallocate(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
Definition: fd.c:2421
int FileZero(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
Definition: fd.c:2376

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FileFallocate(), FilePathName(), FileZero(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), pgoff_t, register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, and SmgrIsTemp.

Variable Documentation

◆ aio_md_readv_cb

PGDLLIMPORT const PgAioHandleCallbacks aio_md_readv_cb
extern

Definition at line 169 of file md.c.