PostgreSQL Source Code git master
Loading...
Searching...
No Matches
md.h File Reference
#include "storage/aio_types.h"
#include "storage/block.h"
#include "storage/relfilelocator.h"
#include "storage/smgr.h"
#include "storage/sync.h"
Include dependency graph for md.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void mdinit (void)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
void mdcreate (SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
bool mdexists (SMgrRelation reln, ForkNumber forknum)
 
void mdunlink (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
void mdzeroextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
 
bool mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
 
uint32 mdmaxcombine (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdreadv (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
 
void mdstartreadv (PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
 
void mdwritev (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void mdregistersync (SMgrRelation reln, ForkNumber forknum)
 
int mdfd (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileLocator *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Variables

PGDLLIMPORT const PgAioHandleCallbacks aio_md_readv_cb
 

Function Documentation

◆ DropRelationFiles()

void DropRelationFiles ( RelFileLocator delrels,
int  ndelrels,
bool  isRedo 
)
extern

Definition at line 1612 of file md.c.

1613{
1615 int i;
1616
1618 for (i = 0; i < ndelrels; i++)
1619 {
1621
1622 if (isRedo)
1623 {
1625
1626 for (fork = 0; fork <= MAX_FORKNUM; fork++)
1628 }
1629 srels[i] = srel;
1630 }
1631
1633
1634 for (i = 0; i < ndelrels; i++)
1635 smgrclose(srels[i]);
1636 pfree(srels);
1637}
#define palloc_array(type, count)
Definition fe_memutils.h:76
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
static int fb(int x)
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
ForkNumber
Definition relpath.h:56
#define MAX_FORKNUM
Definition relpath.h:70
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition smgr.c:240
void smgrclose(SMgrRelation reln)
Definition smgr.c:374
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition smgr.c:538
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
Definition xlogutils.c:630

References fb(), i, INVALID_PROC_NUMBER, MAX_FORKNUM, palloc_array, pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)
extern

Definition at line 1594 of file md.c.

1595{
1596 FileTag tag;
1597 RelFileLocator rlocator;
1598
1599 rlocator.dbOid = dbid;
1600 rlocator.spcOid = 0;
1601 rlocator.relNumber = 0;
1602
1604
1605 RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
1606}
#define InvalidBlockNumber
Definition block.h:33
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
Definition md.c:102
@ InvalidForkNumber
Definition relpath.h:57
Definition sync.h:51
RelFileNumber relNumber
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition sync.c:581
@ SYNC_FILTER_REQUEST
Definition sync.h:28

References RelFileLocator::dbOid, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileLocator::relNumber, RelFileLocator::spcOid, and SYNC_FILTER_REQUEST.

Referenced by createdb_failure_callback(), dbase_redo(), and dropdb().

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)
extern

Definition at line 725 of file md.c.

726{
727 int nopensegs = reln->md_num_open_segs[forknum];
728
729 /* No work if already closed */
730 if (nopensegs == 0)
731 return;
732
733 /* close segments starting from the end */
734 while (nopensegs > 0)
735 {
736 MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
737
739 _fdvec_resize(reln, forknum, nopensegs - 1);
740 nopensegs--;
741 }
742}
void FileClose(File file)
Definition fd.c:1966
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition md.c:1644
Definition md.c:93
File mdfd_vfd
Definition md.c:94

References _fdvec_resize(), fb(), FileClose(), and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forknum,
bool  isRedo 
)
extern

Definition at line 223 of file md.c.

224{
225 MdfdVec *mdfd;
226 RelPathStr path;
227 File fd;
228
229 if (isRedo && reln->md_num_open_segs[forknum] > 0)
230 return; /* created and opened already... */
231
232 Assert(reln->md_num_open_segs[forknum] == 0);
233
234 /*
235 * We may be using the target table space for the first time in this
236 * database, so create a per-database subdirectory if needed.
237 *
238 * XXX this is a fairly ugly violation of module layering, but this seems
239 * to be the best place to put the check. Maybe TablespaceCreateDbspace
240 * should be here and not in commands/tablespace.c? But that would imply
241 * importing a lot of stuff that smgr.c oughtn't know, either.
242 */
243 TablespaceCreateDbspace(reln->smgr_rlocator.locator.spcOid,
244 reln->smgr_rlocator.locator.dbOid,
245 isRedo);
246
247 path = relpath(reln->smgr_rlocator, forknum);
248
250
251 if (fd < 0)
252 {
253 int save_errno = errno;
254
255 if (isRedo)
257 if (fd < 0)
258 {
259 /* be sure to report the error reported by create, not open */
263 errmsg("could not create file \"%s\": %m", path.str)));
264 }
265 }
266
267 _fdvec_resize(reln, forknum, 1);
268 mdfd = &reln->md_seg_fds[forknum][0];
269 mdfd->mdfd_vfd = fd;
270 mdfd->mdfd_segno = 0;
271
272 if (!SmgrIsTemp(reln))
274}
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
Definition tablespace.c:114
#define Assert(condition)
Definition c.h:945
int errcode_for_file_access(void)
Definition elog.c:897
#define ERROR
Definition elog.h:39
#define ereport(elevel,...)
Definition elog.h:150
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition fd.c:1563
int File
Definition fd.h:51
int mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off)
Definition md.c:1495
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition md.c:1519
static int _mdfd_open_flags(void)
Definition md.c:177
static char * errmsg
static int fd(const char *x, int i)
#define relpath(rlocator, forknum)
Definition relpath.h:150
#define SmgrIsTemp(smgr)
Definition smgr.h:74
char str[REL_PATH_STR_MAXLEN+1]
Definition relpath.h:123

References _fdvec_resize(), _mdfd_open_flags(), Assert, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), mdfd(), PathNameOpenFile(), register_dirty_segment(), relpath, SmgrIsTemp, RelPathStr::str, and TablespaceCreateDbspace().

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forknum 
)
extern

Definition at line 204 of file md.c.

205{
206 /*
207 * Close it first, to ensure that we notice if the fork has been unlinked
208 * since we opened it. As an optimization, we can skip that in recovery,
209 * which already closes relations when dropping them.
210 */
211 if (!InRecovery)
212 mdclose(reln, forknum);
213
214 return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
215}
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition md.c:725
#define EXTENSION_RETURN_NULL
Definition md.c:116
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition md.c:676
bool InRecovery
Definition xlogutils.c:50

References EXTENSION_RETURN_NULL, fb(), InRecovery, mdclose(), and mdopenfork().

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void buffer,
bool  skipFsync 
)
extern

Definition at line 488 of file md.c.

490{
491 pgoff_t seekpos;
492 int nbytes;
493 MdfdVec *v;
494
495 /* If this build supports direct I/O, the buffer must be I/O aligned. */
496 if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
497 Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
498
499 /* This assert is too expensive to have on normally ... */
500#ifdef CHECK_WRITE_VS_EXTEND
501 Assert(blocknum >= mdnblocks(reln, forknum));
502#endif
503
504 /*
505 * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
506 * more --- we mustn't create a block whose number actually is
507 * InvalidBlockNumber. (Note that this failure should be unreachable
508 * because of upstream checks in bufmgr.c.)
509 */
510 if (blocknum == InvalidBlockNumber)
513 errmsg("cannot extend file \"%s\" beyond %u blocks",
514 relpath(reln->smgr_rlocator, forknum).str,
516
517 v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
518
519 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
520
521 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
522
523 if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
524 {
525 if (nbytes < 0)
528 errmsg("could not extend file \"%s\": %m",
530 errhint("Check free disk space.")));
531 /* short write: complain appropriately */
534 errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
536 nbytes, BLCKSZ, blocknum),
537 errhint("Check free disk space.")));
538 }
539
540 if (!skipFsync && !SmgrIsTemp(reln))
541 register_dirty_segment(reln, forknum, v);
542
543 Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
544}
uint32 BlockNumber
Definition block.h:31
#define TYPEALIGN(ALIGNVAL, LEN)
Definition c.h:891
int errcode(int sqlerrcode)
Definition elog.c:874
int errhint(const char *fmt,...) pg_attribute_printf(1
char * FilePathName(File file)
Definition fd.c:2500
static ssize_t FileWrite(File file, const void *buffer, size_t amount, pgoff_t offset, uint32 wait_event_info)
Definition fd.h:237
#define PG_O_DIRECT
Definition fd.h:123
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition md.c:1884
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition md.c:1235
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
Definition md.c:1755
#define EXTENSION_CREATE
Definition md.c:118
#define PG_IO_ALIGN_SIZE
off_t pgoff_t
Definition port.h:421

References _mdfd_getseg(), _mdnblocks(), Assert, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg, ERROR, EXTENSION_CREATE, fb(), FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), PG_IO_ALIGN_SIZE, PG_O_DIRECT, register_dirty_segment(), relpath, SmgrIsTemp, and TYPEALIGN.

Referenced by _mdfd_getseg().

◆ mdfd()

int mdfd ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
uint32 off 
)
extern

Definition at line 1495 of file md.c.

1496{
1497 MdfdVec *v = mdopenfork(reln, forknum, EXTENSION_FAIL);
1498
1499 v = _mdfd_getseg(reln, forknum, blocknum, false,
1501
1502 *off = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1503
1504 Assert(*off < (pgoff_t) BLCKSZ * RELSEG_SIZE);
1505
1506 return FileGetRawDesc(v->mdfd_vfd);
1507}
int FileGetRawDesc(File file)
Definition fd.c:2516
#define EXTENSION_FAIL
Definition md.c:114

References _mdfd_getseg(), Assert, EXTENSION_FAIL, fb(), FileGetRawDesc(), _MdfdVec::mdfd_vfd, and mdopenfork().

Referenced by mdcreate(), and mdopenfork().

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)
extern

Definition at line 1975 of file md.c.

1976{
1977 /*
1978 * For now we only use filter requests as a way to drop all scheduled
1979 * callbacks relating to a given database, when dropping the database.
1980 * We'll return true for all candidates that have the same database OID as
1981 * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1982 */
1983 return ftag->rlocator.dbOid == candidate->rlocator.dbOid;
1984}
RelFileLocator rlocator
Definition sync.h:54

References RelFileLocator::dbOid, fb(), and FileTag::rlocator.

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)
extern

Definition at line 1442 of file md.c.

1443{
1444 int segno;
1445 int min_inactive_seg;
1446
1447 /*
1448 * NOTE: mdnblocks makes sure we have opened all active segments, so that
1449 * the loop below will get them all!
1450 */
1451 mdnblocks(reln, forknum);
1452
1453 min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1454
1455 /*
1456 * Temporarily open inactive segments, then close them after sync. There
1457 * may be some inactive segments left opened after fsync() error, but that
1458 * is harmless. We don't bother to clean them up and take a risk of
1459 * further trouble. The next mdclose() will soon close them.
1460 */
1461 while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1462 segno++;
1463
1464 while (segno > 0)
1465 {
1466 MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1467
1468 /*
1469 * fsyncs done through mdimmedsync() should be tracked in a separate
1470 * IOContext than those done through mdsyncfiletag() to differentiate
1471 * between unavoidable client backend fsyncs (e.g. those done during
1472 * index build) and those which ideally would have been done by the
1473 * checkpointer. Since other IO operations bypassing the buffer
1474 * manager could also be tracked in such an IOContext, wait until
1475 * these are also tracked to track immediate fsyncs.
1476 */
1480 errmsg("could not fsync file \"%s\": %m",
1481 FilePathName(v->mdfd_vfd))));
1482
1483 /* Close inactive segments immediately */
1484 if (segno > min_inactive_seg)
1485 {
1486 FileClose(v->mdfd_vfd);
1487 _fdvec_resize(reln, forknum, segno - 1);
1488 }
1489
1490 segno--;
1491 }
1492}
int FileSync(File file, uint32 wait_event_info)
Definition fd.c:2336
int data_sync_elevel(int elevel)
Definition fd.c:3986
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
Definition md.c:1712

References _fdvec_resize(), _mdfd_openseg(), data_sync_elevel(), ereport, errcode_for_file_access(), errmsg, ERROR, fb(), FileClose(), FilePathName(), FileSync(), _MdfdVec::mdfd_vfd, and mdnblocks().

◆ mdinit()

void mdinit ( void  )
extern

Definition at line 191 of file md.c.

192{
194 "MdSmgr",
196}
MemoryContext TopMemoryContext
Definition mcxt.c:166
static MemoryContext MdCxt
Definition md.c:98
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, MdCxt, and TopMemoryContext.

◆ mdmaxcombine()

uint32 mdmaxcombine ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)
extern

Definition at line 845 of file md.c.

847{
848 BlockNumber segoff;
849
850 segoff = blocknum % ((BlockNumber) RELSEG_SIZE);
851
852 return RELSEG_SIZE - segoff;
853}

References fb().

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)
extern

Definition at line 1235 of file md.c.

1236{
1237 MdfdVec *v;
1238 BlockNumber nblocks;
1239 BlockNumber segno;
1240
1241 mdopenfork(reln, forknum, EXTENSION_FAIL);
1242
1243 /* mdopen has opened the first segment */
1244 Assert(reln->md_num_open_segs[forknum] > 0);
1245
1246 /*
1247 * Start from the last open segments, to avoid redundant seeks. We have
1248 * previously verified that these segments are exactly RELSEG_SIZE long,
1249 * and it's useless to recheck that each time.
1250 *
1251 * NOTE: this assumption could only be wrong if another backend has
1252 * truncated the relation. We rely on higher code levels to handle that
1253 * scenario by closing and re-opening the md fd, which is handled via
1254 * relcache flush. (Since the checkpointer doesn't participate in
1255 * relcache flush, it could have segment entries for inactive segments;
1256 * that's OK because the checkpointer never needs to compute relation
1257 * size.)
1258 */
1259 segno = reln->md_num_open_segs[forknum] - 1;
1260 v = &reln->md_seg_fds[forknum][segno];
1261
1262 for (;;)
1263 {
1264 nblocks = _mdnblocks(reln, forknum, v);
1265 if (nblocks > ((BlockNumber) RELSEG_SIZE))
1266 elog(FATAL, "segment too big");
1267 if (nblocks < ((BlockNumber) RELSEG_SIZE))
1268 return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
1269
1270 /*
1271 * If segment is exactly RELSEG_SIZE, advance to next one.
1272 */
1273 segno++;
1274
1275 /*
1276 * We used to pass O_CREAT here, but that has the disadvantage that it
1277 * might create a segment which has vanished through some operating
1278 * system misadventure. In such a case, creating the segment here
1279 * undermines _mdfd_getseg's attempts to notice and report an error
1280 * upon access to a missing segment.
1281 */
1282 v = _mdfd_openseg(reln, forknum, segno, 0);
1283 if (v == NULL)
1284 return segno * ((BlockNumber) RELSEG_SIZE);
1285 }
1286}
#define FATAL
Definition elog.h:41
#define elog(elevel,...)
Definition elog.h:226

References _mdfd_openseg(), _mdnblocks(), Assert, elog, EXTENSION_FAIL, FATAL, fb(), and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdregistersync(), mdwritev(), and mdzeroextend().

◆ mdopen()

void mdopen ( SMgrRelation  reln)
extern

Definition at line 714 of file md.c.

715{
716 /* mark it not open */
717 for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
718 reln->md_num_open_segs[forknum] = 0;
719}

References fb(), and MAX_FORKNUM.

◆ mdprefetch()

bool mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks 
)
extern

Definition at line 748 of file md.c.

750{
751#ifdef USE_PREFETCH
752
754
755 if ((uint64) blocknum + nblocks > (uint64) MaxBlockNumber + 1)
756 return false;
757
758 while (nblocks > 0)
759 {
760 pgoff_t seekpos;
761 MdfdVec *v;
763
764 v = _mdfd_getseg(reln, forknum, blocknum, false,
766 if (v == NULL)
767 return false;
768
769 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
770
771 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
772
774 Min(nblocks,
775 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
776
779
780 blocknum += nblocks_this_segment;
781 nblocks -= nblocks_this_segment;
782 }
783#endif /* USE_PREFETCH */
784
785 return true;
786}
#define MaxBlockNumber
Definition block.h:35
#define Min(x, y)
Definition c.h:1093
uint64_t uint64
Definition c.h:619
int io_direct_flags
Definition fd.c:172
int FilePrefetch(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
Definition fd.c:2067
#define IO_DIRECT_DATA
Definition fd.h:54

References _mdfd_getseg(), Assert, EXTENSION_FAIL, EXTENSION_RETURN_NULL, fb(), FilePrefetch(), InRecovery, IO_DIRECT_DATA, io_direct_flags, MaxBlockNumber, _MdfdVec::mdfd_vfd, and Min.

◆ mdreadv()

void mdreadv ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void **  buffers,
BlockNumber  nblocks 
)
extern

Definition at line 859 of file md.c.

861{
862 while (nblocks > 0)
863 {
864 struct iovec iov[PG_IOV_MAX];
865 int iovcnt;
866 pgoff_t seekpos;
867 int nbytes;
868 MdfdVec *v;
871 size_t size_this_segment;
872
873 v = _mdfd_getseg(reln, forknum, blocknum, false,
875
876 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
877
878 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
879
881 Min(nblocks,
882 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
884
885 if (nblocks_this_segment != nblocks)
886 elog(ERROR, "read crosses segment boundary");
887
891
892 /*
893 * Inner loop to continue after a short read. We'll keep going until
894 * we hit EOF rather than assuming that a short read means we hit the
895 * end.
896 */
897 for (;;)
898 {
899 TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
900 reln->smgr_rlocator.locator.spcOid,
901 reln->smgr_rlocator.locator.dbOid,
902 reln->smgr_rlocator.locator.relNumber,
903 reln->smgr_rlocator.backend);
904 nbytes = FileReadV(v->mdfd_vfd, iov, iovcnt, seekpos,
906 TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
907 reln->smgr_rlocator.locator.spcOid,
908 reln->smgr_rlocator.locator.dbOid,
909 reln->smgr_rlocator.locator.relNumber,
910 reln->smgr_rlocator.backend,
911 nbytes,
913
914#ifdef SIMULATE_SHORT_READ
915 nbytes = Min(nbytes, 4096);
916#endif
917
918 if (nbytes < 0)
921 errmsg("could not read blocks %u..%u in file \"%s\": %m",
922 blocknum,
923 blocknum + nblocks_this_segment - 1,
924 FilePathName(v->mdfd_vfd))));
925
926 if (nbytes == 0)
927 {
928 /*
929 * We are at or past EOF, or we read a partial block at EOF.
930 * Normally this is an error; upper levels should never try to
931 * read a nonexistent block. However, if zero_damaged_pages
932 * is ON or we are InRecovery, we should instead return zeroes
933 * without complaining. This allows, for example, the case of
934 * trying to update a block that was later truncated away.
935 *
936 * NB: We think that this codepath is unreachable in recovery
937 * and incomplete with zero_damaged_pages, as missing segments
938 * are not created. Putting blocks into the buffer-pool that
939 * do not exist on disk is rather problematic, as it will not
940 * be found by scans that rely on smgrnblocks(), as they are
941 * beyond EOF. It also can cause weird problems with relation
942 * extension, as relation extension does not expect blocks
943 * beyond EOF to exist.
944 *
945 * Therefore we do not want to copy the logic into
946 * mdstartreadv(), where it would have to be more complicated
947 * due to potential differences in the zero_damaged_pages
948 * setting between the definer and completor of IO.
949 *
950 * For PG 18, we are putting an Assert(false) in mdreadv()
951 * (triggering failures in assertion-enabled builds, but
952 * continuing to work in production builds). Afterwards we
953 * plan to remove this code entirely.
954 */
956 {
957 Assert(false); /* see comment above */
958
961 ++i)
962 memset(buffers[i], 0, BLCKSZ);
963 break;
964 }
965 else
968 errmsg("could not read blocks %u..%u in file \"%s\": read only %zu of %zu bytes",
969 blocknum,
970 blocknum + nblocks_this_segment - 1,
974 }
975
976 /* One loop should usually be enough. */
977 transferred_this_segment += nbytes;
980 break;
981
982 /* Adjust position and vectors after a short read. */
983 seekpos += nbytes;
985 }
986
987 nblocks -= nblocks_this_segment;
988 buffers += nblocks_this_segment;
989 blocknum += nblocks_this_segment;
990 }
991}
bool zero_damaged_pages
Definition bufmgr.c:189
#define lengthof(array)
Definition c.h:875
ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info)
Definition fd.c:2149
int compute_remaining_iovec(struct iovec *destination, const struct iovec *source, int iovcnt, size_t transferred)
Definition file_utils.c:614
#define EXTENSION_CREATE_RECOVERY
Definition md.c:120
static int buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
Definition md.c:796
#define ERRCODE_DATA_CORRUPTED
#define PG_IOV_MAX
Definition pg_iovec.h:47

References _mdfd_getseg(), Assert, buffers_to_iovec(), compute_remaining_iovec(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg, ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, fb(), FilePathName(), FileReadV(), i, InRecovery, lengthof, _MdfdVec::mdfd_vfd, Min, PG_IOV_MAX, and zero_damaged_pages.

◆ mdregistersync()

void mdregistersync ( SMgrRelation  reln,
ForkNumber  forknum 
)
extern

Definition at line 1391 of file md.c.

1392{
1393 int segno;
1394 int min_inactive_seg;
1395
1396 /*
1397 * NOTE: mdnblocks makes sure we have opened all active segments, so that
1398 * the loop below will get them all!
1399 */
1400 mdnblocks(reln, forknum);
1401
1402 min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1403
1404 /*
1405 * Temporarily open inactive segments, then close them after sync. There
1406 * may be some inactive segments left opened after error, but that is
1407 * harmless. We don't bother to clean them up and take a risk of further
1408 * trouble. The next mdclose() will soon close them.
1409 */
1410 while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1411 segno++;
1412
1413 while (segno > 0)
1414 {
1415 MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1416
1417 register_dirty_segment(reln, forknum, v);
1418
1419 /* Close inactive segments immediately */
1420 if (segno > min_inactive_seg)
1421 {
1422 FileClose(v->mdfd_vfd);
1423 _fdvec_resize(reln, forknum, segno - 1);
1424 }
1425
1426 segno--;
1427 }
1428}

References _fdvec_resize(), _mdfd_openseg(), fb(), FileClose(), _MdfdVec::mdfd_vfd, mdnblocks(), and register_dirty_segment().

◆ mdstartreadv()

void mdstartreadv ( PgAioHandle ioh,
SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void **  buffers,
BlockNumber  nblocks 
)
extern

Definition at line 997 of file md.c.

1000{
1001 pgoff_t seekpos;
1002 MdfdVec *v;
1004 struct iovec *iov;
1005 int iovcnt;
1006 int ret;
1007
1008 v = _mdfd_getseg(reln, forknum, blocknum, false,
1010
1011 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1012
1013 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
1014
1016 Min(nblocks,
1017 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
1018
1019 if (nblocks_this_segment != nblocks)
1020 elog(ERROR, "read crossing segment boundary");
1021
1023
1024 Assert(nblocks <= iovcnt);
1025
1027
1029
1032
1034 reln,
1035 forknum,
1036 blocknum,
1037 nblocks,
1038 false);
1040
1042 if (ret != 0)
1043 ereport(ERROR,
1045 errmsg("could not start reading blocks %u..%u in file \"%s\": %m",
1046 blocknum,
1047 blocknum + nblocks_this_segment - 1,
1048 FilePathName(v->mdfd_vfd))));
1049
1050 /*
1051 * The error checks corresponding to the post-read checks in mdreadv() are
1052 * in md_readv_complete().
1053 *
1054 * However we chose, at least for now, to not implement the
1055 * zero_damaged_pages logic present in mdreadv(). As outlined in mdreadv()
1056 * that logic is rather problematic, and we want to get rid of it. Here
1057 * equivalent logic would have to be more complicated due to potential
1058 * differences in the zero_damaged_pages setting between the definer and
1059 * completor of IO.
1060 */
1061}
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
Definition aio.c:330
@ PGAIO_HCB_MD_READV
Definition aio.h:196
@ PGAIO_HF_BUFFERED
Definition aio.h:77
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
int pgaio_io_get_iovec(PgAioHandle *ioh, struct iovec **iov)
Definition aio_io.c:42
int FileStartReadV(PgAioHandle *ioh, File file, int iovcnt, pgoff_t offset, uint32 wait_event_info)
Definition fd.c:2205
void pgaio_io_set_target_smgr(PgAioHandle *ioh, SMgrRelationData *smgr, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skip_fsync)
Definition smgr.c:1038

References _mdfd_getseg(), Assert, buffers_to_iovec(), elog, ereport, errcode_for_file_access(), errmsg, ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, fb(), FilePathName(), FileStartReadV(), IO_DIRECT_DATA, io_direct_flags, _MdfdVec::mdfd_vfd, Min, PGAIO_HCB_MD_READV, PGAIO_HF_BUFFERED, pgaio_io_get_iovec(), pgaio_io_register_callbacks(), pgaio_io_set_flag(), and pgaio_io_set_target_smgr().

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char path 
)
extern

Definition at line 1905 of file md.c.

1906{
1908 File file;
1910 bool need_to_close;
1911 int result,
1912 save_errno;
1913
1914 /* See if we already have the file open, or need to open it. */
1915 if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
1916 {
1917 file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
1918 strlcpy(path, FilePathName(file), MAXPGPATH);
1919 need_to_close = false;
1920 }
1921 else
1922 {
1923 MdPathStr p;
1924
1925 p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1926 strlcpy(path, p.str, MD_PATH_STR_MAXLEN);
1927
1928 file = PathNameOpenFile(path, _mdfd_open_flags());
1929 if (file < 0)
1930 return -1;
1931 need_to_close = true;
1932 }
1933
1935
1936 /* Sync the file. */
1937 result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1938 save_errno = errno;
1939
1940 if (need_to_close)
1941 FileClose(file);
1942
1944 IOOP_FSYNC, io_start, 1, 0);
1945
1946 errno = save_errno;
1947 return result;
1948}
bool track_io_timing
Definition bufmgr.c:192
static MdPathStr _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition md.c:1692
#define MD_PATH_STR_MAXLEN
Definition md.c:133
#define MAXPGPATH
@ IOOBJECT_RELATION
Definition pgstat.h:281
@ IOCONTEXT_NORMAL
Definition pgstat.h:293
@ IOOP_FSYNC
Definition pgstat.h:312
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition pgstat_io.c:91
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:122
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition strlcpy.c:45
int16 forknum
Definition sync.h:53
uint64 segno
Definition sync.h:55
char str[MD_PATH_STR_MAXLEN+1]
Definition md.c:141

References _mdfd_open_flags(), _mdfd_segpath(), fb(), FileClose(), FilePathName(), FileSync(), FileTag::forknum, INVALID_PROC_NUMBER, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, MAXPGPATH, MD_PATH_STR_MAXLEN, PathNameOpenFile(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), FileTag::rlocator, FileTag::segno, smgropen(), MdPathStr::str, strlcpy(), and track_io_timing.

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  curnblk,
BlockNumber  nblocks 
)
extern

Definition at line 1302 of file md.c.

1304{
1306 int curopensegs;
1307
1308 if (nblocks > curnblk)
1309 {
1310 /* Bogus request ... but no complaint if InRecovery */
1311 if (InRecovery)
1312 return;
1313 ereport(ERROR,
1314 (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
1315 relpath(reln->smgr_rlocator, forknum).str,
1316 nblocks, curnblk)));
1317 }
1318 if (nblocks == curnblk)
1319 return; /* no work */
1320
1321 /*
1322 * Truncate segments, starting at the last one. Starting at the end makes
1323 * managing the memory for the fd array easier, should there be errors.
1324 */
1325 curopensegs = reln->md_num_open_segs[forknum];
1326 while (curopensegs > 0)
1327 {
1328 MdfdVec *v;
1329
1331
1332 v = &reln->md_seg_fds[forknum][curopensegs - 1];
1333
1334 if (priorblocks > nblocks)
1335 {
1336 /*
1337 * This segment is no longer active. We truncate the file, but do
1338 * not delete it, for reasons explained in the header comments.
1339 */
1341 ereport(ERROR,
1343 errmsg("could not truncate file \"%s\": %m",
1344 FilePathName(v->mdfd_vfd))));
1345
1346 if (!SmgrIsTemp(reln))
1347 register_dirty_segment(reln, forknum, v);
1348
1349 /* we never drop the 1st segment */
1350 Assert(v != &reln->md_seg_fds[forknum][0]);
1351
1352 FileClose(v->mdfd_vfd);
1353 _fdvec_resize(reln, forknum, curopensegs - 1);
1354 }
1355 else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
1356 {
1357 /*
1358 * This is the last segment we want to keep. Truncate the file to
1359 * the right length. NOTE: if nblocks is exactly a multiple K of
1360 * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
1361 * keep it. This adheres to the invariant given in the header
1362 * comments.
1363 */
1365
1367 ereport(ERROR,
1369 errmsg("could not truncate file \"%s\" to %u blocks: %m",
1371 nblocks)));
1372 if (!SmgrIsTemp(reln))
1373 register_dirty_segment(reln, forknum, v);
1374 }
1375 else
1376 {
1377 /*
1378 * We still need this segment, so nothing to do for this and any
1379 * earlier segment.
1380 */
1381 break;
1382 }
1383 curopensegs--;
1384 }
1385}
int FileTruncate(File file, pgoff_t offset, uint32 wait_event_info)
Definition fd.c:2465

References _fdvec_resize(), Assert, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), FileClose(), FilePathName(), FileTruncate(), InRecovery, _MdfdVec::mdfd_vfd, register_dirty_segment(), relpath, and SmgrIsTemp.

◆ mdunlink()

void mdunlink ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)
extern

Definition at line 338 of file md.c.

339{
340 /* Now do the per-fork work */
341 if (forknum == InvalidForkNumber)
342 {
343 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
344 mdunlinkfork(rlocator, forknum, isRedo);
345 }
346 else
347 mdunlinkfork(rlocator, forknum, isRedo);
348}
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition md.c:375

References fb(), InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char path 
)
extern

Definition at line 1957 of file md.c.

1958{
1959 RelPathStr p;
1960
1961 /* Compute the path. */
1962 p = relpathperm(ftag->rlocator, MAIN_FORKNUM);
1963 strlcpy(path, p.str, MAXPGPATH);
1964
1965 /* Try to unlink the file. */
1966 return unlink(path);
1967}
@ MAIN_FORKNUM
Definition relpath.h:58
#define relpathperm(rlocator, forknum)
Definition relpath.h:146

References fb(), MAIN_FORKNUM, MAXPGPATH, relpathperm, FileTag::rlocator, RelPathStr::str, and strlcpy().

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)
extern

Definition at line 1176 of file md.c.

1178{
1180
1181 /*
1182 * Issue flush requests in as few requests as possible; have to split at
1183 * segment boundaries though, since those are actually separate files.
1184 */
1185 while (nblocks > 0)
1186 {
1187 BlockNumber nflush = nblocks;
1188 pgoff_t seekpos;
1189 MdfdVec *v;
1190 int segnum_start,
1191 segnum_end;
1192
1193 v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
1195
1196 /*
1197 * We might be flushing buffers of already removed relations, that's
1198 * ok, just ignore that case. If the segment file wasn't open already
1199 * (ie from a recent mdwrite()), then we don't want to re-open it, to
1200 * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave
1201 * us with a descriptor to a file that is about to be unlinked.
1202 */
1203 if (!v)
1204 return;
1205
1206 /* compute offset inside the current segment */
1207 segnum_start = blocknum / RELSEG_SIZE;
1208
1209 /* compute number of desired writes within the current segment */
1210 segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
1211 if (segnum_start != segnum_end)
1212 nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
1213
1214 Assert(nflush >= 1);
1215 Assert(nflush <= nblocks);
1216
1217 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1218
1220
1221 nblocks -= nflush;
1222 blocknum += nflush;
1223 }
1224}
void FileWriteback(File file, pgoff_t offset, pgoff_t nbytes, uint32 wait_event_info)
Definition fd.c:2123
#define EXTENSION_DONT_OPEN
Definition md.c:122

References _mdfd_getseg(), Assert, EXTENSION_DONT_OPEN, fb(), FileWriteback(), IO_DIRECT_DATA, io_direct_flags, and _MdfdVec::mdfd_vfd.

◆ mdwritev()

void mdwritev ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void **  buffers,
BlockNumber  nblocks,
bool  skipFsync 
)
extern

Definition at line 1071 of file md.c.

1073{
1074 /* This assert is too expensive to have on normally ... */
1075#ifdef CHECK_WRITE_VS_EXTEND
1076 Assert((uint64) blocknum + (uint64) nblocks <= (uint64) mdnblocks(reln, forknum));
1077#endif
1078
1079 while (nblocks > 0)
1080 {
1081 struct iovec iov[PG_IOV_MAX];
1082 int iovcnt;
1083 pgoff_t seekpos;
1084 int nbytes;
1085 MdfdVec *v;
1088 size_t size_this_segment;
1089
1090 v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
1092
1093 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1094
1095 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
1096
1098 Min(nblocks,
1099 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
1101
1102 if (nblocks_this_segment != nblocks)
1103 elog(ERROR, "write crosses segment boundary");
1104
1105 iovcnt = buffers_to_iovec(iov, (void **) buffers, nblocks_this_segment);
1108
1109 /*
1110 * Inner loop to continue after a short write. If the reason is that
1111 * we're out of disk space, a future attempt should get an ENOSPC
1112 * error from the kernel.
1113 */
1114 for (;;)
1115 {
1116 TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
1117 reln->smgr_rlocator.locator.spcOid,
1118 reln->smgr_rlocator.locator.dbOid,
1119 reln->smgr_rlocator.locator.relNumber,
1120 reln->smgr_rlocator.backend);
1121 nbytes = FileWriteV(v->mdfd_vfd, iov, iovcnt, seekpos,
1123 TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
1124 reln->smgr_rlocator.locator.spcOid,
1125 reln->smgr_rlocator.locator.dbOid,
1126 reln->smgr_rlocator.locator.relNumber,
1127 reln->smgr_rlocator.backend,
1128 nbytes,
1130
1131#ifdef SIMULATE_SHORT_WRITE
1132 nbytes = Min(nbytes, 4096);
1133#endif
1134
1135 if (nbytes < 0)
1136 {
1137 bool enospc = errno == ENOSPC;
1138
1139 ereport(ERROR,
1141 errmsg("could not write blocks %u..%u in file \"%s\": %m",
1142 blocknum,
1143 blocknum + nblocks_this_segment - 1,
1145 enospc ? errhint("Check free disk space.") : 0));
1146 }
1147
1148 /* One loop should usually be enough. */
1149 transferred_this_segment += nbytes;
1152 break;
1153
1154 /* Adjust position and iovecs after a short write. */
1155 seekpos += nbytes;
1157 }
1158
1159 if (!skipFsync && !SmgrIsTemp(reln))
1160 register_dirty_segment(reln, forknum, v);
1161
1162 nblocks -= nblocks_this_segment;
1163 buffers += nblocks_this_segment;
1164 blocknum += nblocks_this_segment;
1165 }
1166}
ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info)
Definition fd.c:2231

References _mdfd_getseg(), Assert, buffers_to_iovec(), compute_remaining_iovec(), elog, ereport, errcode_for_file_access(), errhint(), errmsg, ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, fb(), FilePathName(), FileWriteV(), lengthof, _MdfdVec::mdfd_vfd, mdnblocks(), Min, PG_IOV_MAX, register_dirty_segment(), and SmgrIsTemp.

◆ mdzeroextend()

void mdzeroextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks,
bool  skipFsync 
)
extern

Definition at line 553 of file md.c.

555{
556 MdfdVec *v;
557 BlockNumber curblocknum = blocknum;
558 int remblocks = nblocks;
559
560 Assert(nblocks > 0);
561
562 /* This assert is too expensive to have on normally ... */
563#ifdef CHECK_WRITE_VS_EXTEND
564 Assert(blocknum >= mdnblocks(reln, forknum));
565#endif
566
567 /*
568 * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
569 * more --- we mustn't create a block whose number actually is
570 * InvalidBlockNumber or larger.
571 */
572 if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
575 errmsg("cannot extend file \"%s\" beyond %u blocks",
576 relpath(reln->smgr_rlocator, forknum).str,
578
579 while (remblocks > 0)
580 {
582 pgoff_t seekpos = (pgoff_t) BLCKSZ * segstartblock;
583 int numblocks;
584
587 else
589
591
594
595 /*
596 * If available and useful, use posix_fallocate() (via
597 * FileFallocate()) to extend the relation. That's often more
598 * efficient than using write(), as it commonly won't cause the kernel
599 * to allocate page cache space for the extended pages.
600 *
601 * However, we don't use FileFallocate() for small extensions, as it
602 * defeats delayed allocation on some filesystems. Not clear where
603 * that decision should be made though? For now just use a cutoff of
604 * 8, anything between 4 and 8 worked OK in some local testing.
605 */
606 if (numblocks > 8 &&
608 {
609 int ret = 0;
610
611#ifdef HAVE_POSIX_FALLOCATE
613 {
614 ret = FileFallocate(v->mdfd_vfd,
615 seekpos, (pgoff_t) BLCKSZ * numblocks,
617 }
618 else
619#endif
620 {
621 elog(ERROR, "unsupported file_extend_method: %d",
623 }
624 if (ret != 0)
625 {
628 errmsg("could not extend file \"%s\" with FileFallocate(): %m",
630 errhint("Check free disk space."));
631 }
632 }
633 else
634 {
635 int ret;
636
637 /*
638 * Even if we don't want to use fallocate, we can still extend a
639 * bit more efficiently than writing each 8kB block individually.
640 * pg_pwrite_zeros() (via FileZero()) uses pg_pwritev_with_retry()
641 * to avoid multiple writes or needing a zeroed buffer for the
642 * whole length of the extension.
643 */
644 ret = FileZero(v->mdfd_vfd,
645 seekpos, (pgoff_t) BLCKSZ * numblocks,
647 if (ret < 0)
650 errmsg("could not extend file \"%s\": %m",
652 errhint("Check free disk space."));
653 }
654
655 if (!skipFsync && !SmgrIsTemp(reln))
656 register_dirty_segment(reln, forknum, v);
657
658 Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
659
662 }
663}
int file_extend_method
Definition fd.c:169
int FileFallocate(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
Definition fd.c:2408
int FileZero(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
Definition fd.c:2363
@ FILE_EXTEND_METHOD_WRITE_ZEROS
Definition fd.h:63

References _mdfd_getseg(), _mdnblocks(), Assert, elog, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg, ERROR, EXTENSION_CREATE, fb(), file_extend_method, FILE_EXTEND_METHOD_WRITE_ZEROS, FileFallocate(), FilePathName(), FileZero(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, and SmgrIsTemp.

Variable Documentation

◆ aio_md_readv_cb

PGDLLIMPORT const PgAioHandleCallbacks aio_md_readv_cb
extern

Definition at line 170 of file md.c.

170 {
171 .complete_shared = md_readv_complete,
172 .report = md_readv_report,
173};
static void md_readv_report(PgAioResult result, const PgAioTargetData *td, int elevel)
Definition md.c:2057
static PgAioResult md_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition md.c:1990