PostgreSQL Source Code git master
Loading...
Searching...
No Matches
sync.h File Reference
Include dependency graph for sync.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  FileTag
 

Typedefs

typedef enum SyncRequestType SyncRequestType
 
typedef enum SyncRequestHandler SyncRequestHandler
 
typedef struct FileTag FileTag
 

Enumerations

enum  SyncRequestType { SYNC_REQUEST , SYNC_UNLINK_REQUEST , SYNC_FORGET_REQUEST , SYNC_FILTER_REQUEST }
 
enum  SyncRequestHandler {
  SYNC_HANDLER_MD = 0 , SYNC_HANDLER_CLOG , SYNC_HANDLER_COMMIT_TS , SYNC_HANDLER_MULTIXACT_OFFSET ,
  SYNC_HANDLER_MULTIXACT_MEMBER , SYNC_HANDLER_NONE
}
 

Functions

void InitSync (void)
 
void SyncPreCheckpoint (void)
 
void SyncPostCheckpoint (void)
 
void ProcessSyncRequests (void)
 
void RememberSyncRequest (const FileTag *ftag, SyncRequestType type)
 
bool RegisterSyncRequest (const FileTag *ftag, SyncRequestType type, bool retryOnError)
 

Typedef Documentation

◆ FileTag

◆ SyncRequestHandler

◆ SyncRequestType

Enumeration Type Documentation

◆ SyncRequestHandler

Enumerator
SYNC_HANDLER_MD 
SYNC_HANDLER_CLOG 
SYNC_HANDLER_COMMIT_TS 
SYNC_HANDLER_MULTIXACT_OFFSET 
SYNC_HANDLER_MULTIXACT_MEMBER 
SYNC_HANDLER_NONE 

Definition at line 35 of file sync.h.

36{
SyncRequestHandler
Definition sync.h:36
@ SYNC_HANDLER_MD
Definition sync.h:37
@ SYNC_HANDLER_COMMIT_TS
Definition sync.h:39
@ SYNC_HANDLER_MULTIXACT_MEMBER
Definition sync.h:41
@ SYNC_HANDLER_CLOG
Definition sync.h:38
@ SYNC_HANDLER_NONE
Definition sync.h:42
@ SYNC_HANDLER_MULTIXACT_OFFSET
Definition sync.h:40

◆ SyncRequestType

Enumerator
SYNC_REQUEST 
SYNC_UNLINK_REQUEST 
SYNC_FORGET_REQUEST 
SYNC_FILTER_REQUEST 

Definition at line 23 of file sync.h.

24{
25 SYNC_REQUEST, /* schedule a call of sync function */
26 SYNC_UNLINK_REQUEST, /* schedule a call of unlink function */
27 SYNC_FORGET_REQUEST, /* forget all calls for a tag */
28 SYNC_FILTER_REQUEST, /* forget all calls satisfying match fn */
SyncRequestType
Definition sync.h:24
@ SYNC_FILTER_REQUEST
Definition sync.h:28
@ SYNC_FORGET_REQUEST
Definition sync.h:27
@ SYNC_UNLINK_REQUEST
Definition sync.h:26
@ SYNC_REQUEST
Definition sync.h:25

Function Documentation

◆ InitSync()

void InitSync ( void  )
extern

Definition at line 125 of file sync.c.

126{
127 /*
128 * Create pending-operations hashtable if we need it. Currently, we need
129 * it if we are standalone (not under a postmaster) or if we are a
130 * checkpointer auxiliary process.
131 */
133 {
135
136 /*
137 * XXX: The checkpointer needs to add entries to the pending ops table
138 * when absorbing fsync requests. That is done within a critical
139 * section, which isn't usually allowed, but we make an exception. It
140 * means that there's a theoretical possibility that you run out of
141 * memory while absorbing fsync requests, which leads to a PANIC.
142 * Fortunately the hash table is small so that's unlikely to happen in
143 * practice.
144 */
146 "Pending ops context",
149
150 hash_ctl.keysize = sizeof(FileTag);
151 hash_ctl.entrysize = sizeof(PendingFsyncEntry);
152 hash_ctl.hcxt = pendingOpsCxt;
153 pendingOps = hash_create("Pending Ops Table",
154 100L,
155 &hash_ctl,
158 }
159}
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:358
bool IsUnderPostmaster
Definition globals.c:120
#define HASH_CONTEXT
Definition hsearch.h:102
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_BLOBS
Definition hsearch.h:97
MemoryContext TopMemoryContext
Definition mcxt.c:166
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition mcxt.c:743
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define AmCheckpointerProcess()
Definition miscadmin.h:389
#define NIL
Definition pg_list.h:68
static int fb(int x)
Definition sync.h:51
static List * pendingUnlinks
Definition sync.c:72
static HTAB * pendingOps
Definition sync.c:71
static MemoryContext pendingOpsCxt
Definition sync.c:73

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, AmCheckpointerProcess, fb(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, IsUnderPostmaster, MemoryContextAllowInCriticalSection(), NIL, pendingOps, pendingOpsCxt, pendingUnlinks, and TopMemoryContext.

Referenced by BaseInit().

◆ ProcessSyncRequests()

void ProcessSyncRequests ( void  )
extern

Definition at line 287 of file sync.c.

288{
289 static bool sync_in_progress = false;
290
292 PendingFsyncEntry *entry;
293 int absorb_counter;
294
295 /* Statistics on sync times */
296 int processed = 0;
298 sync_end,
299 sync_diff;
301 uint64 longest = 0;
303
304 /*
305 * This is only called during checkpoints, and checkpoints should only
306 * occur in processes that have created a pendingOps.
307 */
308 if (!pendingOps)
309 elog(ERROR, "cannot sync without a pendingOps table");
310
311 /*
312 * If we are in the checkpointer, the sync had better include all fsync
313 * requests that were queued by backends up to this point. The tightest
314 * race condition that could occur is that a buffer that must be written
315 * and fsync'd for the checkpoint could have been dumped by a backend just
316 * before it was visited by BufferSync(). We know the backend will have
317 * queued an fsync request before clearing the buffer's dirtybit, so we
318 * are safe as long as we do an Absorb after completing BufferSync().
319 */
321
322 /*
323 * To avoid excess fsync'ing (in the worst case, maybe a never-terminating
324 * checkpoint), we want to ignore fsync requests that are entered into the
325 * hashtable after this point --- they should be processed next time,
326 * instead. We use sync_cycle_ctr to tell old entries apart from new
327 * ones: new ones will have cycle_ctr equal to the incremented value of
328 * sync_cycle_ctr.
329 *
330 * In normal circumstances, all entries present in the table at this point
331 * will have cycle_ctr exactly equal to the current (about to be old)
332 * value of sync_cycle_ctr. However, if we fail partway through the
333 * fsync'ing loop, then older values of cycle_ctr might remain when we
334 * come back here to try again. Repeated checkpoint failures would
335 * eventually wrap the counter around to the point where an old entry
336 * might appear new, causing us to skip it, possibly allowing a checkpoint
337 * to succeed that should not have. To forestall wraparound, any time the
338 * previous ProcessSyncRequests() failed to complete, run through the
339 * table and forcibly set cycle_ctr = sync_cycle_ctr.
340 *
341 * Think not to merge this loop with the main loop, as the problem is
342 * exactly that that loop may fail before having visited all the entries.
343 * From a performance point of view it doesn't matter anyway, as this path
344 * will never be taken in a system that's functioning normally.
345 */
347 {
348 /* prior try failed, so update any stale cycle_ctr values */
350 while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
351 {
352 entry->cycle_ctr = sync_cycle_ctr;
353 }
354 }
355
356 /* Advance counter so that new hashtable entries are distinguishable */
358
359 /* Set flag to detect failure if we don't reach the end of the loop */
360 sync_in_progress = true;
361
362 /* Now scan the hashtable for fsync requests to process */
365 while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
366 {
367 int failures;
368
369 /*
370 * If the entry is new then don't process it this time; it is new.
371 * Note "continue" bypasses the hash-remove call at the bottom of the
372 * loop.
373 */
374 if (entry->cycle_ctr == sync_cycle_ctr)
375 continue;
376
377 /* Else assert we haven't missed it */
378 Assert((CycleCtr) (entry->cycle_ctr + 1) == sync_cycle_ctr);
379
380 /*
381 * If fsync is off then we don't have to bother opening the file at
382 * all. (We delay checking until this point so that changing fsync on
383 * the fly behaves sensibly.)
384 */
385 if (enableFsync)
386 {
387 /*
388 * If in checkpointer, we want to absorb pending requests every so
389 * often to prevent overflow of the fsync request queue. It is
390 * unspecified whether newly-added entries will be visited by
391 * hash_seq_search, but we don't care since we don't need to
392 * process them anyway.
393 */
394 if (--absorb_counter <= 0)
395 {
398 }
399
400 /*
401 * The fsync table could contain requests to fsync segments that
402 * have been deleted (unlinked) by the time we get to them. Rather
403 * than just hoping an ENOENT (or EACCES on Windows) error can be
404 * ignored, what we do on error is absorb pending requests and
405 * then retry. Since mdunlink() queues a "cancel" message before
406 * actually unlinking, the fsync request is guaranteed to be
407 * marked canceled after the absorb if it really was this case.
408 * DROP DATABASE likewise has to tell us to forget fsync requests
409 * before it starts deletions.
410 */
411 for (failures = 0; !entry->canceled; failures++)
412 {
413 char path[MAXPGPATH];
414
416 if (syncsw[entry->tag.handler].sync_syncfiletag(&entry->tag,
417 path) == 0)
418 {
419 /* Success; update statistics about sync timing */
424 if (elapsed > longest)
427 processed++;
428
429 if (log_checkpoints)
430 elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms",
431 processed,
432 path,
433 (double) elapsed / 1000);
434
435 break; /* out of retry loop */
436 }
437
438 /*
439 * It is possible that the relation has been dropped or
440 * truncated since the fsync request was entered. Therefore,
441 * allow ENOENT, but only if we didn't fail already on this
442 * file.
443 */
444 if (!FILE_POSSIBLY_DELETED(errno) || failures > 0)
447 errmsg("could not fsync file \"%s\": %m",
448 path)));
449 else
452 errmsg_internal("could not fsync file \"%s\" but retrying: %m",
453 path)));
454
455 /*
456 * Absorb incoming requests and check to see if a cancel
457 * arrived for this relation fork.
458 */
460 absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
461 } /* end retry loop */
462 }
463
464 /* We are done with this entry, remove it */
465 if (hash_search(pendingOps, &entry->tag, HASH_REMOVE, NULL) == NULL)
466 elog(ERROR, "pendingOps corrupted");
467 } /* end loop over hashtable entries */
468
469 /* Return sync performance metrics for report at checkpoint end */
473
474 /* Flag successful completion of ProcessSyncRequests */
475 sync_in_progress = false;
476}
#define Assert(condition)
Definition c.h:945
uint64_t uint64
Definition c.h:619
void AbsorbSyncRequests(void)
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1415
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1380
int errcode_for_file_access(void)
Definition elog.c:897
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
int data_sync_elevel(int elevel)
Definition fd.c:3986
#define FILE_POSSIBLY_DELETED(err)
Definition fd.h:89
bool enableFsync
Definition globals.c:129
@ HASH_REMOVE
Definition hsearch.h:115
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:177
#define INSTR_TIME_GET_MICROSEC(t)
Definition instr_time.h:192
static char * errmsg
#define MAXPGPATH
static chr * longest(struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
Definition rege_dfa.c:42
uint64 ckpt_agg_sync_time
Definition xlog.h:187
uint64 ckpt_longest_sync
Definition xlog.h:186
int16 handler
Definition sync.h:52
FileTag tag
Definition sync.c:59
CycleCtr cycle_ctr
Definition sync.c:60
bool canceled
Definition sync.c:61
int(* sync_syncfiletag)(const FileTag *ftag, char *path)
Definition sync.c:87
static const SyncOps syncsw[]
Definition sync.c:96
static CycleCtr sync_cycle_ctr
Definition sync.c:75
#define FSYNCS_PER_ABSORB
Definition sync.c:79
uint16 CycleCtr
Definition sync.c:55
bool log_checkpoints
Definition xlog.c:133
CheckpointStatsData CheckpointStats
Definition xlog.c:213

References AbsorbSyncRequests(), Assert, PendingFsyncEntry::canceled, CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_sync_rels, PendingFsyncEntry::cycle_ctr, data_sync_elevel(), DEBUG1, elog, enableFsync, ereport, errcode_for_file_access(), errmsg, errmsg_internal(), ERROR, fb(), FILE_POSSIBLY_DELETED, FSYNCS_PER_ABSORB, FileTag::handler, HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, log_checkpoints, longest(), MAXPGPATH, pendingOps, sync_cycle_ctr, SyncOps::sync_syncfiletag, syncsw, and PendingFsyncEntry::tag.

Referenced by CheckPointGuts().

◆ RegisterSyncRequest()

bool RegisterSyncRequest ( const FileTag ftag,
SyncRequestType  type,
bool  retryOnError 
)
extern

Definition at line 581 of file sync.c.

583{
584 bool ret;
585
586 if (pendingOps != NULL)
587 {
588 /* standalone backend or startup process: fsync state is local */
590 return true;
591 }
592
593 for (;;)
594 {
595 /*
596 * Notify the checkpointer about it. If we fail to queue a message in
597 * retryOnError mode, we have to sleep and try again ... ugly, but
598 * hopefully won't happen often.
599 *
600 * XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with an
601 * error in the case of SYNC_UNLINK_REQUEST would leave the
602 * no-longer-used file still present on disk, which would be bad, so
603 * I'm inclined to assume that the checkpointer will always empty the
604 * queue soon.
605 */
606 ret = ForwardSyncRequest(ftag, type);
607
608 /*
609 * If we are successful in queueing the request, or we failed and were
610 * instructed not to retry on error, break.
611 */
612 if (ret || (!ret && !retryOnError))
613 break;
614
617 }
618
619 return ret;
620}
bool ForwardSyncRequest(const FileTag *ftag, SyncRequestType type)
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
void RememberSyncRequest(const FileTag *ftag, SyncRequestType type)
Definition sync.c:488
const char * type
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH

References fb(), ForwardSyncRequest(), pendingOps, RememberSyncRequest(), type, WaitLatch(), WL_EXIT_ON_PM_DEATH, and WL_TIMEOUT.

Referenced by ForgetDatabaseSyncRequests(), register_dirty_segment(), register_forget_request(), register_unlink_segment(), SlruInternalDeleteSegment(), and SlruPhysicalWritePage().

◆ RememberSyncRequest()

void RememberSyncRequest ( const FileTag ftag,
SyncRequestType  type 
)
extern

Definition at line 488 of file sync.c.

489{
491
493 {
494 PendingFsyncEntry *entry;
495
496 /* Cancel previously entered request */
498 ftag,
499 HASH_FIND,
500 NULL);
501 if (entry != NULL)
502 entry->canceled = true;
503 }
504 else if (type == SYNC_FILTER_REQUEST)
505 {
508 ListCell *cell;
509
510 /* Cancel matching fsync requests */
512 while ((pfe = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
513 {
514 if (pfe->tag.handler == ftag->handler &&
515 syncsw[ftag->handler].sync_filetagmatches(ftag, &pfe->tag))
516 pfe->canceled = true;
517 }
518
519 /* Cancel matching unlink requests */
520 foreach(cell, pendingUnlinks)
521 {
523
524 if (pue->tag.handler == ftag->handler &&
525 syncsw[ftag->handler].sync_filetagmatches(ftag, &pue->tag))
526 pue->canceled = true;
527 }
528 }
529 else if (type == SYNC_UNLINK_REQUEST)
530 {
531 /* Unlink request: put it in the linked list */
533 PendingUnlinkEntry *entry;
534
536 entry->tag = *ftag;
538 entry->canceled = false;
539
541
543 }
544 else
545 {
546 /* Normal case: enter a request to fsync this segment */
548 PendingFsyncEntry *entry;
549 bool found;
550
552
554 ftag,
556 &found);
557 /* if new entry, or was previously canceled, initialize it */
558 if (!found || entry->canceled)
559 {
560 entry->cycle_ctr = sync_cycle_ctr;
561 entry->canceled = false;
562 }
563
564 /*
565 * NB: it's intentional that we don't change cycle_ctr if the entry
566 * already exists. The cycle_ctr must represent the oldest fsync
567 * request that could be in the entry.
568 */
569
571 }
572}
#define palloc_object(type)
Definition fe_memutils.h:74
@ HASH_FIND
Definition hsearch.h:113
@ HASH_ENTER
Definition hsearch.h:114
List * lappend(List *list, void *datum)
Definition list.c:339
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
#define lfirst(lc)
Definition pg_list.h:172
FileTag tag
Definition sync.c:66
CycleCtr cycle_ctr
Definition sync.c:67
bool(* sync_filetagmatches)(const FileTag *ftag, const FileTag *candidate)
Definition sync.c:89
static CycleCtr checkpoint_cycle_ctr
Definition sync.c:76

References Assert, PendingFsyncEntry::canceled, PendingUnlinkEntry::canceled, checkpoint_cycle_ctr, PendingFsyncEntry::cycle_ctr, PendingUnlinkEntry::cycle_ctr, fb(), FileTag::handler, HASH_ENTER, HASH_FIND, hash_search(), hash_seq_init(), hash_seq_search(), lappend(), lfirst, MemoryContextSwitchTo(), palloc_object, pendingOps, pendingOpsCxt, pendingUnlinks, sync_cycle_ctr, SyncOps::sync_filetagmatches, SYNC_FILTER_REQUEST, SYNC_FORGET_REQUEST, SYNC_REQUEST, SYNC_UNLINK_REQUEST, syncsw, PendingUnlinkEntry::tag, and type.

Referenced by AbsorbSyncRequests(), and RegisterSyncRequest().

◆ SyncPostCheckpoint()

void SyncPostCheckpoint ( void  )
extern

Definition at line 203 of file sync.c.

204{
205 int absorb_counter;
206 ListCell *lc;
207
209 foreach(lc, pendingUnlinks)
210 {
212 char path[MAXPGPATH];
213
214 /* Skip over any canceled entries */
215 if (entry->canceled)
216 continue;
217
218 /*
219 * New entries are appended to the end, so if the entry is new we've
220 * reached the end of old entries.
221 *
222 * Note: if just the right number of consecutive checkpoints fail, we
223 * could be fooled here by cycle_ctr wraparound. However, the only
224 * consequence is that we'd delay unlinking for one more checkpoint,
225 * which is perfectly tolerable.
226 */
227 if (entry->cycle_ctr == checkpoint_cycle_ctr)
228 break;
229
230 /* Unlink the file */
231 if (syncsw[entry->tag.handler].sync_unlinkfiletag(&entry->tag,
232 path) < 0)
233 {
234 /*
235 * There's a race condition, when the database is dropped at the
236 * same time that we process the pending unlink requests. If the
237 * DROP DATABASE deletes the file before we do, we will get ENOENT
238 * here. rmtree() also has to ignore ENOENT errors, to deal with
239 * the possibility that we delete the file first.
240 */
241 if (errno != ENOENT)
244 errmsg("could not remove file \"%s\": %m", path)));
245 }
246
247 /* Mark the list entry as canceled, just in case */
248 entry->canceled = true;
249
250 /*
251 * As in ProcessSyncRequests, we don't want to stop absorbing fsync
252 * requests for a long time when there are many deletions to be done.
253 * We can safely call AbsorbSyncRequests() at this point in the loop.
254 */
255 if (--absorb_counter <= 0)
256 {
259 }
260 }
261
262 /*
263 * If we reached the end of the list, we can just remove the whole list
264 * (remembering to pfree all the PendingUnlinkEntry objects). Otherwise,
265 * we must keep the entries at or after "lc".
266 */
267 if (lc == NULL)
268 {
271 }
272 else
273 {
274 int ntodelete = list_cell_number(pendingUnlinks, lc);
275
276 for (int i = 0; i < ntodelete; i++)
278
280 }
281}
#define WARNING
Definition elog.h:36
int i
Definition isn.c:77
List * list_delete_first_n(List *list, int n)
Definition list.c:983
void list_free_deep(List *list)
Definition list.c:1560
void pfree(void *pointer)
Definition mcxt.c:1616
static void * list_nth(const List *list, int n)
Definition pg_list.h:299
static int list_cell_number(const List *l, const ListCell *c)
Definition pg_list.h:333
int(* sync_unlinkfiletag)(const FileTag *ftag, char *path)
Definition sync.c:88
#define UNLINKS_PER_ABSORB
Definition sync.c:80

References AbsorbSyncRequests(), PendingUnlinkEntry::canceled, checkpoint_cycle_ctr, PendingUnlinkEntry::cycle_ctr, ereport, errcode_for_file_access(), errmsg, fb(), FileTag::handler, i, lfirst, list_cell_number(), list_delete_first_n(), list_free_deep(), list_nth(), MAXPGPATH, NIL, pendingUnlinks, pfree(), SyncOps::sync_unlinkfiletag, syncsw, PendingUnlinkEntry::tag, UNLINKS_PER_ABSORB, and WARNING.

Referenced by CreateCheckPoint().

◆ SyncPreCheckpoint()

void SyncPreCheckpoint ( void  )
extern

Definition at line 178 of file sync.c.

179{
180 /*
181 * Operations such as DROP TABLESPACE assume that the next checkpoint will
182 * process all recently forwarded unlink requests, but if they aren't
183 * absorbed prior to advancing the cycle counter, they won't be processed
184 * until a future checkpoint. The following absorb ensures that any
185 * unlink requests forwarded before the checkpoint began will be processed
186 * in the current checkpoint.
187 */
189
190 /*
191 * Any unlink requests arriving after this point will be assigned the next
192 * cycle counter, and won't be unlinked until next checkpoint.
193 */
195}

References AbsorbSyncRequests(), and checkpoint_cycle_ctr.

Referenced by CreateCheckPoint().