PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
standby.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * standby.c
4 * Misc functions used in Hot Standby mode.
5 *
6 * All functions for handling RM_STANDBY_ID, which relate to
7 * AccessExclusiveLocks and starting snapshots for Hot Standby mode.
8 * Plus conflict recovery processing.
9 *
10 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 * IDENTIFICATION
14 * src/backend/storage/ipc/standby.c
15 *
16 *-------------------------------------------------------------------------
17 */
18#include "postgres.h"
19#include "access/transam.h"
20#include "access/twophase.h"
21#include "access/xact.h"
22#include "access/xloginsert.h"
23#include "access/xlogrecovery.h"
24#include "access/xlogutils.h"
25#include "miscadmin.h"
26#include "pgstat.h"
27#include "replication/slot.h"
28#include "storage/bufmgr.h"
29#include "storage/proc.h"
30#include "storage/procarray.h"
31#include "storage/sinvaladt.h"
32#include "storage/standby.h"
33#include "utils/hsearch.h"
35#include "utils/ps_status.h"
36#include "utils/timeout.h"
37#include "utils/timestamp.h"
38
39/* User-settable GUC parameters */
43
44/*
45 * Keep track of all the exclusive locks owned by original transactions.
46 * For each known exclusive lock, there is a RecoveryLockEntry in the
47 * RecoveryLockHash hash table. All RecoveryLockEntrys belonging to a
48 * given XID are chained together so that we can find them easily.
49 * For each original transaction that is known to have any such locks,
50 * there is a RecoveryLockXidEntry in the RecoveryLockXidHash hash table,
51 * which stores the head of the chain of its locks.
52 */
53typedef struct RecoveryLockEntry
54{
55 xl_standby_lock key; /* hash key: xid, dbOid, relOid */
56 struct RecoveryLockEntry *next; /* chain link */
58
60{
61 TransactionId xid; /* hash key -- must be first */
62 struct RecoveryLockEntry *head; /* chain head */
64
65static HTAB *RecoveryLockHash = NULL;
67
68/* Flags set by timeout handlers */
69static volatile sig_atomic_t got_standby_deadlock_timeout = false;
70static volatile sig_atomic_t got_standby_delay_timeout = false;
71static volatile sig_atomic_t got_standby_lock_timeout = false;
72
74 ProcSignalReason reason,
75 uint32 wait_event_info,
76 bool report_waiting);
79static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
80static const char *get_recovery_conflict_desc(ProcSignalReason reason);
81
82/*
83 * InitRecoveryTransactionEnvironment
84 * Initialize tracking of our primary's in-progress transactions.
85 *
86 * We need to issue shared invalidations and hold locks. Holding locks
87 * means others may want to wait on us, so we need to make a lock table
88 * vxact entry like a real transaction. We could create and delete
89 * lock table entries for each transaction but its simpler just to create
90 * one permanent entry and leave it there all the time. Locks are then
91 * acquired and released as needed. Yes, this means you can see the
92 * Startup process in pg_locks once we have run this.
93 */
94void
96{
98 HASHCTL hash_ctl;
99
100 Assert(RecoveryLockHash == NULL); /* don't run this twice */
101
102 /*
103 * Initialize the hash tables for tracking the locks held by each
104 * transaction.
105 */
106 hash_ctl.keysize = sizeof(xl_standby_lock);
107 hash_ctl.entrysize = sizeof(RecoveryLockEntry);
108 RecoveryLockHash = hash_create("RecoveryLockHash",
109 64,
110 &hash_ctl,
112 hash_ctl.keysize = sizeof(TransactionId);
113 hash_ctl.entrysize = sizeof(RecoveryLockXidEntry);
114 RecoveryLockXidHash = hash_create("RecoveryLockXidHash",
115 64,
116 &hash_ctl,
118
119 /*
120 * Initialize shared invalidation management for Startup process, being
121 * careful to register ourselves as a sendOnly process so we don't need to
122 * read messages, nor will we get signaled when the queue starts filling
123 * up.
124 */
126
127 /*
128 * Lock a virtual transaction id for Startup process.
129 *
130 * We need to do GetNextLocalTransactionId() because
131 * SharedInvalBackendInit() leaves localTransactionId invalid and the lock
132 * manager doesn't like that at all.
133 *
134 * Note that we don't need to run XactLockTableInsert() because nobody
135 * needs to wait on xids. That sounds a little strange, but table locks
136 * are held by vxids and row level locks are held by xids. All queries
137 * hold AccessShareLocks so never block while we write or lock new rows.
138 */
143
145}
146
147/*
148 * ShutdownRecoveryTransactionEnvironment
149 * Shut down transaction tracking
150 *
151 * Prepare to switch from hot standby mode to normal operation. Shut down
152 * recovery-time transaction tracking.
153 *
154 * This must be called even in shutdown of startup process if transaction
155 * tracking has been initialized. Otherwise some locks the tracked
156 * transactions were holding will not be released and may interfere with
157 * the processes still running (but will exit soon later) at the exit of
158 * startup process.
159 */
160void
162{
163 /*
164 * Do nothing if RecoveryLockHash is NULL because that means that
165 * transaction tracking has not yet been initialized or has already been
166 * shut down. This makes it safe to have possibly-redundant calls of this
167 * function during process exit.
168 */
169 if (RecoveryLockHash == NULL)
170 return;
171
172 /* Mark all tracked in-progress transactions as finished. */
174
175 /* Release all locks the tracked transactions were holding */
177
178 /* Destroy the lock hash tables. */
181 RecoveryLockHash = NULL;
182 RecoveryLockXidHash = NULL;
183
184 /* Cleanup our VirtualTransaction */
186}
187
188
189/*
190 * -----------------------------------------------------
191 * Standby wait timers and backend cancel logic
192 * -----------------------------------------------------
193 */
194
195/*
196 * Determine the cutoff time at which we want to start canceling conflicting
197 * transactions. Returns zero (a time safely in the past) if we are willing
198 * to wait forever.
199 */
200static TimestampTz
202{
203 TimestampTz rtime;
204 bool fromStream;
205
206 /*
207 * The cutoff time is the last WAL data receipt time plus the appropriate
208 * delay variable. Delay of -1 means wait forever.
209 */
210 GetXLogReceiptTime(&rtime, &fromStream);
211 if (fromStream)
212 {
214 return 0; /* wait forever */
216 }
217 else
218 {
220 return 0; /* wait forever */
222 }
223}
224
225#define STANDBY_INITIAL_WAIT_US 1000
227
228/*
229 * Standby wait logic for ResolveRecoveryConflictWithVirtualXIDs.
230 * We wait here for a while then return. If we decide we can't wait any
231 * more then we return true, if we can wait some more return false.
232 */
233static bool
235{
236 TimestampTz ltime;
237
239
240 /* Are we past the limit time? */
241 ltime = GetStandbyLimitTime();
242 if (ltime && GetCurrentTimestamp() >= ltime)
243 return true;
244
245 /*
246 * Sleep a bit (this is essential to avoid busy-waiting).
247 */
248 pgstat_report_wait_start(wait_event_info);
251
252 /*
253 * Progressively increase the sleep times, but not to more than 1s, since
254 * pg_usleep isn't interruptible on some platforms.
255 */
256 standbyWait_us *= 2;
257 if (standbyWait_us > 1000000)
258 standbyWait_us = 1000000;
259
260 return false;
261}
262
263/*
264 * Log the recovery conflict.
265 *
266 * wait_start is the timestamp when the caller started to wait.
267 * now is the timestamp when this function has been called.
268 * wait_list is the list of virtual transaction ids assigned to
269 * conflicting processes. still_waiting indicates whether
270 * the startup process is still waiting for the recovery conflict
271 * to be resolved or not.
272 */
273void
276 bool still_waiting)
277{
278 long secs;
279 int usecs;
280 long msecs;
282 int nprocs = 0;
283
284 /*
285 * There must be no conflicting processes when the recovery conflict has
286 * already been resolved.
287 */
288 Assert(still_waiting || wait_list == NULL);
289
290 TimestampDifference(wait_start, now, &secs, &usecs);
291 msecs = secs * 1000 + usecs / 1000;
292 usecs = usecs % 1000;
293
294 if (wait_list)
295 {
297
298 /* Construct a string of list of the conflicting processes */
299 vxids = wait_list;
300 while (VirtualTransactionIdIsValid(*vxids))
301 {
302 PGPROC *proc = ProcNumberGetProc(vxids->procNumber);
303
304 /* proc can be NULL if the target backend is not active */
305 if (proc)
306 {
307 if (nprocs == 0)
308 {
310 appendStringInfo(&buf, "%d", proc->pid);
311 }
312 else
313 appendStringInfo(&buf, ", %d", proc->pid);
314
315 nprocs++;
316 }
317
318 vxids++;
319 }
320 }
321
322 /*
323 * If wait_list is specified, report the list of PIDs of active
324 * conflicting backends in a detail message. Note that if all the backends
325 * in the list are not active, no detail message is logged.
326 */
327 if (still_waiting)
328 {
329 ereport(LOG,
330 errmsg("recovery still waiting after %ld.%03d ms: %s",
331 msecs, usecs, get_recovery_conflict_desc(reason)),
332 nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
333 "Conflicting processes: %s.",
334 nprocs, buf.data) : 0);
335 }
336 else
337 {
338 ereport(LOG,
339 errmsg("recovery finished waiting after %ld.%03d ms: %s",
340 msecs, usecs, get_recovery_conflict_desc(reason)));
341 }
342
343 if (nprocs > 0)
344 pfree(buf.data);
345}
346
347/*
348 * This is the main executioner for any query backend that conflicts with
349 * recovery processing. Judgement has already been passed on it within
350 * a specific rmgr. Here we just issue the orders to the procs. The procs
351 * then throw the required error as instructed.
352 *
353 * If report_waiting is true, "waiting" is reported in PS display and the
354 * wait for recovery conflict is reported in the log, if necessary. If
355 * the caller is responsible for reporting them, report_waiting should be
356 * false. Otherwise, both the caller and this function report the same
357 * thing unexpectedly.
358 */
359static void
361 ProcSignalReason reason, uint32 wait_event_info,
362 bool report_waiting)
363{
364 TimestampTz waitStart = 0;
365 bool waiting = false;
366 bool logged_recovery_conflict = false;
367
368 /* Fast exit, to avoid a kernel call if there's no work to be done. */
369 if (!VirtualTransactionIdIsValid(*waitlist))
370 return;
371
372 /* Set the wait start timestamp for reporting */
373 if (report_waiting && (log_recovery_conflict_waits || update_process_title))
374 waitStart = GetCurrentTimestamp();
375
376 while (VirtualTransactionIdIsValid(*waitlist))
377 {
378 /* reset standbyWait_us for each xact we wait for */
380
381 /* wait until the virtual xid is gone */
382 while (!VirtualXactLock(*waitlist, false))
383 {
384 /* Is it time to kill it? */
385 if (WaitExceedsMaxStandbyDelay(wait_event_info))
386 {
387 pid_t pid;
388
389 /*
390 * Now find out who to throw out of the balloon.
391 */
393 pid = CancelVirtualTransaction(*waitlist, reason);
394
395 /*
396 * Wait a little bit for it to die so that we avoid flooding
397 * an unresponsive backend when system is heavily loaded.
398 */
399 if (pid != 0)
400 pg_usleep(5000L);
401 }
402
403 if (waitStart != 0 && (!logged_recovery_conflict || !waiting))
404 {
405 TimestampTz now = 0;
406 bool maybe_log_conflict;
407 bool maybe_update_title;
408
409 maybe_log_conflict = (log_recovery_conflict_waits && !logged_recovery_conflict);
410 maybe_update_title = (update_process_title && !waiting);
411
412 /* Get the current timestamp if not report yet */
413 if (maybe_log_conflict || maybe_update_title)
415
416 /*
417 * Report via ps if we have been waiting for more than 500
418 * msec (should that be configurable?)
419 */
420 if (maybe_update_title &&
421 TimestampDifferenceExceeds(waitStart, now, 500))
422 {
423 set_ps_display_suffix("waiting");
424 waiting = true;
425 }
426
427 /*
428 * Emit the log message if the startup process is waiting
429 * longer than deadlock_timeout for recovery conflict.
430 */
431 if (maybe_log_conflict &&
433 {
434 LogRecoveryConflict(reason, waitStart, now, waitlist, true);
435 logged_recovery_conflict = true;
436 }
437 }
438 }
439
440 /* The virtual transaction is gone now, wait for the next one */
441 waitlist++;
442 }
443
444 /*
445 * Emit the log message if recovery conflict was resolved but the startup
446 * process waited longer than deadlock_timeout for it.
447 */
448 if (logged_recovery_conflict)
449 LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
450 NULL, false);
451
452 /* reset ps display to remove the suffix if we added one */
453 if (waiting)
455
456}
457
458/*
459 * Generate whatever recovery conflicts are needed to eliminate snapshots that
460 * might see XIDs <= snapshotConflictHorizon as still running.
461 *
462 * snapshotConflictHorizon cutoffs are our standard approach to generating
463 * granular recovery conflicts. Note that InvalidTransactionId values are
464 * interpreted as "definitely don't need any conflicts" here, which is a
465 * general convention that WAL records can (and often do) depend on.
466 */
467void
469 bool isCatalogRel,
470 RelFileLocator locator)
471{
472 VirtualTransactionId *backends;
473
474 /*
475 * If we get passed InvalidTransactionId then we do nothing (no conflict).
476 *
477 * This can happen when replaying already-applied WAL records after a
478 * standby crash or restart, or when replaying an XLOG_HEAP2_VISIBLE
479 * record that marks as frozen a page which was already all-visible. It's
480 * also quite common with records generated during index deletion
481 * (original execution of the deletion can reason that a recovery conflict
482 * which is sufficient for the deletion operation must take place before
483 * replay of the deletion record itself).
484 */
485 if (!TransactionIdIsValid(snapshotConflictHorizon))
486 return;
487
488 Assert(TransactionIdIsNormal(snapshotConflictHorizon));
489 backends = GetConflictingVirtualXIDs(snapshotConflictHorizon,
490 locator.dbOid);
493 WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT,
494 true);
495
496 /*
497 * Note that WaitExceedsMaxStandbyDelay() is not taken into account here
498 * (as opposed to ResolveRecoveryConflictWithVirtualXIDs() above). That
499 * seems OK, given that this kind of conflict should not normally be
500 * reached, e.g. due to using a physical replication slot.
501 */
502 if (wal_level >= WAL_LEVEL_LOGICAL && isCatalogRel)
504 snapshotConflictHorizon);
505}
506
507/*
508 * Variant of ResolveRecoveryConflictWithSnapshot that works with
509 * FullTransactionId values
510 */
511void
513 bool isCatalogRel,
514 RelFileLocator locator)
515{
516 /*
517 * ResolveRecoveryConflictWithSnapshot operates on 32-bit TransactionIds,
518 * so truncate the logged FullTransactionId. If the logged value is very
519 * old, so that XID wrap-around already happened on it, there can't be any
520 * snapshots that still see it.
521 */
523 uint64 diff;
524
525 diff = U64FromFullTransactionId(nextXid) -
526 U64FromFullTransactionId(snapshotConflictHorizon);
527 if (diff < MaxTransactionId / 2)
528 {
529 TransactionId truncated;
530
531 truncated = XidFromFullTransactionId(snapshotConflictHorizon);
533 isCatalogRel,
534 locator);
535 }
536}
537
538void
540{
541 VirtualTransactionId *temp_file_users;
542
543 /*
544 * Standby users may be currently using this tablespace for their
545 * temporary files. We only care about current users because
546 * temp_tablespace parameter will just ignore tablespaces that no longer
547 * exist.
548 *
549 * Ask everybody to cancel their queries immediately so we can ensure no
550 * temp files remain and we can remove the tablespace. Nuke the entire
551 * site from orbit, it's the only way to be sure.
552 *
553 * XXX: We could work out the pids of active backends using this
554 * tablespace by examining the temp filenames in the directory. We would
555 * then convert the pids into VirtualXIDs before attempting to cancel
556 * them.
557 *
558 * We don't wait for commit because drop tablespace is non-transactional.
559 */
561 InvalidOid);
564 WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE,
565 true);
566}
567
568void
570{
571 /*
572 * We don't do ResolveRecoveryConflictWithVirtualXIDs() here since that
573 * only waits for transactions and completely idle sessions would block
574 * us. This is rare enough that we do this as simply as possible: no wait,
575 * just force them off immediately.
576 *
577 * No locking is required here because we already acquired
578 * AccessExclusiveLock. Anybody trying to connect while we do this will
579 * block during InitPostgres() and then disconnect when they see the
580 * database has been removed.
581 */
582 while (CountDBBackends(dbid) > 0)
583 {
585
586 /*
587 * Wait awhile for them to die so that we avoid flooding an
588 * unresponsive backend when system is heavily loaded.
589 */
590 pg_usleep(10000);
591 }
592}
593
594/*
595 * ResolveRecoveryConflictWithLock is called from ProcSleep()
596 * to resolve conflicts with other backends holding relation locks.
597 *
598 * The WaitLatch sleep normally done in ProcSleep()
599 * (when not InHotStandby) is performed here, for code clarity.
600 *
601 * We either resolve conflicts immediately or set a timeout to wake us at
602 * the limit of our patience.
603 *
604 * Resolve conflicts by canceling to all backends holding a conflicting
605 * lock. As we are already queued to be granted the lock, no new lock
606 * requests conflicting with ours will be granted in the meantime.
607 *
608 * We also must check for deadlocks involving the Startup process and
609 * hot-standby backend processes. If deadlock_timeout is reached in
610 * this function, all the backends holding the conflicting locks are
611 * requested to check themselves for deadlocks.
612 *
613 * logging_conflict should be true if the recovery conflict has not been
614 * logged yet even though logging is enabled. After deadlock_timeout is
615 * reached and the request for deadlock check is sent, we wait again to
616 * be signaled by the release of the lock if logging_conflict is false.
617 * Otherwise we return without waiting again so that the caller can report
618 * the recovery conflict. In this case, then, this function is called again
619 * with logging_conflict=false (because the recovery conflict has already
620 * been logged) and we will wait again for the lock to be released.
621 */
622void
623ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
624{
625 TimestampTz ltime;
627
629
630 ltime = GetStandbyLimitTime();
632
633 /*
634 * Update waitStart if first time through after the startup process
635 * started waiting for the lock. It should not be updated every time
636 * ResolveRecoveryConflictWithLock() is called during the wait.
637 *
638 * Use the current time obtained for comparison with ltime as waitStart
639 * (i.e., the time when this process started waiting for the lock). Since
640 * getting the current time newly can cause overhead, we reuse the
641 * already-obtained time to avoid that overhead.
642 *
643 * Note that waitStart is updated without holding the lock table's
644 * partition lock, to avoid the overhead by additional lock acquisition.
645 * This can cause "waitstart" in pg_locks to become NULL for a very short
646 * period of time after the wait started even though "granted" is false.
647 * This is OK in practice because we can assume that users are likely to
648 * look at "waitstart" when waiting for the lock for a long time.
649 */
652
653 if (now >= ltime && ltime != 0)
654 {
655 /*
656 * We're already behind, so clear a path as quickly as possible.
657 */
658 VirtualTransactionId *backends;
659
660 backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
661
662 /*
663 * Prevent ResolveRecoveryConflictWithVirtualXIDs() from reporting
664 * "waiting" in PS display by disabling its argument report_waiting
665 * because the caller, WaitOnLock(), has already reported that.
666 */
669 PG_WAIT_LOCK | locktag.locktag_type,
670 false);
671 }
672 else
673 {
674 /*
675 * Wait (or wait again) until ltime, and check for deadlocks as well
676 * if we will be waiting longer than deadlock_timeout
677 */
678 EnableTimeoutParams timeouts[2];
679 int cnt = 0;
680
681 if (ltime != 0)
682 {
684 timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
685 timeouts[cnt].type = TMPARAM_AT;
686 timeouts[cnt].fin_time = ltime;
687 cnt++;
688 }
689
691 timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
692 timeouts[cnt].type = TMPARAM_AFTER;
693 timeouts[cnt].delay_ms = DeadlockTimeout;
694 cnt++;
695
696 enable_timeouts(timeouts, cnt);
697 }
698
699 /* Wait to be signaled by the release of the Relation Lock */
701
702 /*
703 * Exit if ltime is reached. Then all the backends holding conflicting
704 * locks will be canceled in the next ResolveRecoveryConflictWithLock()
705 * call.
706 */
708 goto cleanup;
709
711 {
712 VirtualTransactionId *backends;
713
714 backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
715
716 /* Quick exit if there's no work to be done */
717 if (!VirtualTransactionIdIsValid(*backends))
718 goto cleanup;
719
720 /*
721 * Send signals to all the backends holding the conflicting locks, to
722 * ask them to check themselves for deadlocks.
723 */
724 while (VirtualTransactionIdIsValid(*backends))
725 {
726 SignalVirtualTransaction(*backends,
728 false);
729 backends++;
730 }
731
732 /*
733 * Exit if the recovery conflict has not been logged yet even though
734 * logging is enabled, so that the caller can log that. Then
735 * RecoveryConflictWithLock() is called again and we will wait again
736 * for the lock to be released.
737 */
738 if (logging_conflict)
739 goto cleanup;
740
741 /*
742 * Wait again here to be signaled by the release of the Relation Lock,
743 * to prevent the subsequent RecoveryConflictWithLock() from causing
744 * deadlock_timeout and sending a request for deadlocks check again.
745 * Otherwise the request continues to be sent every deadlock_timeout
746 * until the relation locks are released or ltime is reached.
747 */
750 }
751
752cleanup:
753
754 /*
755 * Clear any timeout requests established above. We assume here that the
756 * Startup process doesn't have any other outstanding timeouts than those
757 * used by this function. If that stops being true, we could cancel the
758 * timeouts individually, but that'd be slower.
759 */
763}
764
765/*
766 * ResolveRecoveryConflictWithBufferPin is called from LockBufferForCleanup()
767 * to resolve conflicts with other backends holding buffer pins.
768 *
769 * The ProcWaitForSignal() sleep normally done in LockBufferForCleanup()
770 * (when not InHotStandby) is performed here, for code clarity.
771 *
772 * We either resolve conflicts immediately or set a timeout to wake us at
773 * the limit of our patience.
774 *
775 * Resolve conflicts by sending a PROCSIG signal to all backends to check if
776 * they hold one of the buffer pins that is blocking Startup process. If so,
777 * those backends will take an appropriate error action, ERROR or FATAL.
778 *
779 * We also must check for deadlocks. Deadlocks occur because if queries
780 * wait on a lock, that must be behind an AccessExclusiveLock, which can only
781 * be cleared if the Startup process replays a transaction completion record.
782 * If Startup process is also waiting then that is a deadlock. The deadlock
783 * can occur if the query is waiting and then the Startup sleeps, or if
784 * Startup is sleeping and the query waits on a lock. We protect against
785 * only the former sequence here, the latter sequence is checked prior to
786 * the query sleeping, in CheckRecoveryConflictDeadlock().
787 *
788 * Deadlocks are extremely rare, and relatively expensive to check for,
789 * so we don't do a deadlock check right away ... only if we have had to wait
790 * at least deadlock_timeout.
791 */
792void
794{
795 TimestampTz ltime;
796
798
799 ltime = GetStandbyLimitTime();
800
801 if (GetCurrentTimestamp() >= ltime && ltime != 0)
802 {
803 /*
804 * We're already behind, so clear a path as quickly as possible.
805 */
807 }
808 else
809 {
810 /*
811 * Wake up at ltime, and check for deadlocks as well if we will be
812 * waiting longer than deadlock_timeout
813 */
814 EnableTimeoutParams timeouts[2];
815 int cnt = 0;
816
817 if (ltime != 0)
818 {
819 timeouts[cnt].id = STANDBY_TIMEOUT;
820 timeouts[cnt].type = TMPARAM_AT;
821 timeouts[cnt].fin_time = ltime;
822 cnt++;
823 }
824
826 timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
827 timeouts[cnt].type = TMPARAM_AFTER;
828 timeouts[cnt].delay_ms = DeadlockTimeout;
829 cnt++;
830
831 enable_timeouts(timeouts, cnt);
832 }
833
834 /*
835 * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
836 * by one of the timeouts established above.
837 *
838 * We assume that only UnpinBuffer() and the timeout requests established
839 * above can wake us up here. WakeupRecovery() called by walreceiver or
840 * SIGHUP signal handler, etc cannot do that because it uses the different
841 * latch from that ProcWaitForSignal() waits on.
842 */
843 ProcWaitForSignal(WAIT_EVENT_BUFFER_PIN);
844
848 {
849 /*
850 * Send out a request for hot-standby backends to check themselves for
851 * deadlocks.
852 *
853 * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
854 * to be signaled by UnpinBuffer() again and send a request for
855 * deadlocks check if deadlock_timeout happens. This causes the
856 * request to continue to be sent every deadlock_timeout until the
857 * buffer is unpinned or ltime is reached. This would increase the
858 * workload in the startup process and backends. In practice it may
859 * not be so harmful because the period that the buffer is kept pinned
860 * is basically no so long. But we should fix this?
861 */
863 }
864
865 /*
866 * Clear any timeout requests established above. We assume here that the
867 * Startup process doesn't have any other timeouts than what this function
868 * uses. If that stops being true, we could cancel the timeouts
869 * individually, but that'd be slower.
870 */
874}
875
876static void
878{
881
882 /*
883 * We send signal to all backends to ask them if they are holding the
884 * buffer pin which is delaying the Startup process. We must not set the
885 * conflict flag yet, since most backends will be innocent. Let the
886 * SIGUSR1 handling in each backend decide their own fate.
887 */
888 CancelDBBackends(InvalidOid, reason, false);
889}
890
891/*
892 * In Hot Standby perform early deadlock detection. We abort the lock
893 * wait if we are about to sleep while holding the buffer pin that Startup
894 * process is waiting for.
895 *
896 * Note: this code is pessimistic, because there is no way for it to
897 * determine whether an actual deadlock condition is present: the lock we
898 * need to wait for might be unrelated to any held by the Startup process.
899 * Sooner or later, this mechanism should get ripped out in favor of somehow
900 * accounting for buffer locks in DeadLockCheck(). However, errors here
901 * seem to be very low-probability in practice, so for now it's not worth
902 * the trouble.
903 */
904void
906{
907 Assert(!InRecovery); /* do not call in Startup process */
908
910 return;
911
912 /*
913 * Error message should match ProcessInterrupts() but we avoid calling
914 * that because we aren't handling an interrupt at this point. Note that
915 * we only cancel the current transaction here, so if we are in a
916 * subtransaction and the pin is held by a parent, then the Startup
917 * process will continue to wait even though we have avoided deadlock.
918 */
921 errmsg("canceling statement due to conflict with recovery"),
922 errdetail("User transaction caused buffer deadlock with recovery.")));
923}
924
925
926/* --------------------------------
927 * timeout handler routines
928 * --------------------------------
929 */
930
931/*
932 * StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT is
933 * exceeded.
934 */
935void
937{
939}
940
941/*
942 * StandbyTimeoutHandler() will be called if STANDBY_TIMEOUT is exceeded.
943 */
944void
946{
948}
949
950/*
951 * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
952 */
953void
955{
957}
958
959/*
960 * -----------------------------------------------------
961 * Locking in Recovery Mode
962 * -----------------------------------------------------
963 *
964 * All locks are held by the Startup process using a single virtual
965 * transaction. This implementation is both simpler and in some senses,
966 * more correct. The locks held mean "some original transaction held
967 * this lock, so query access is not allowed at this time". So the Startup
968 * process is the proxy by which the original locks are implemented.
969 *
970 * We only keep track of AccessExclusiveLocks, which are only ever held by
971 * one transaction on one relation.
972 *
973 * We keep a table of known locks in the RecoveryLockHash hash table.
974 * The point of that table is to let us efficiently de-duplicate locks,
975 * which is important because checkpoints will re-report the same locks
976 * already held. There is also a RecoveryLockXidHash table with one entry
977 * per xid, which allows us to efficiently find all the locks held by a
978 * given original transaction.
979 *
980 * We use session locks rather than normal locks so we don't need
981 * ResourceOwners.
982 */
983
984
985void
987{
988 RecoveryLockXidEntry *xidentry;
989 RecoveryLockEntry *lockentry;
991 LOCKTAG locktag;
992 bool found;
993
994 /* Already processed? */
995 if (!TransactionIdIsValid(xid) ||
998 return;
999
1000 elog(DEBUG4, "adding recovery lock: db %u rel %u", dbOid, relOid);
1001
1002 /* dbOid is InvalidOid when we are locking a shared relation. */
1003 Assert(OidIsValid(relOid));
1004
1005 /* Create a hash entry for this xid, if we don't have one already. */
1006 xidentry = hash_search(RecoveryLockXidHash, &xid, HASH_ENTER, &found);
1007 if (!found)
1008 {
1009 Assert(xidentry->xid == xid); /* dynahash should have set this */
1010 xidentry->head = NULL;
1011 }
1012
1013 /* Create a hash entry for this lock, unless we have one already. */
1014 key.xid = xid;
1015 key.dbOid = dbOid;
1016 key.relOid = relOid;
1017 lockentry = hash_search(RecoveryLockHash, &key, HASH_ENTER, &found);
1018 if (!found)
1019 {
1020 /* It's new, so link it into the XID's list ... */
1021 lockentry->next = xidentry->head;
1022 xidentry->head = lockentry;
1023
1024 /* ... and acquire the lock locally. */
1025 SET_LOCKTAG_RELATION(locktag, dbOid, relOid);
1026
1027 (void) LockAcquire(&locktag, AccessExclusiveLock, true, false);
1028 }
1029}
1030
1031/*
1032 * Release all the locks associated with this RecoveryLockXidEntry.
1033 */
1034static void
1036{
1037 RecoveryLockEntry *entry;
1039
1040 for (entry = xidentry->head; entry != NULL; entry = next)
1041 {
1042 LOCKTAG locktag;
1043
1044 elog(DEBUG4,
1045 "releasing recovery lock: xid %u db %u rel %u",
1046 entry->key.xid, entry->key.dbOid, entry->key.relOid);
1047 /* Release the lock ... */
1048 SET_LOCKTAG_RELATION(locktag, entry->key.dbOid, entry->key.relOid);
1049 if (!LockRelease(&locktag, AccessExclusiveLock, true))
1050 {
1051 elog(LOG,
1052 "RecoveryLockHash contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u",
1053 entry->key.xid, entry->key.dbOid, entry->key.relOid);
1054 Assert(false);
1055 }
1056 /* ... and remove the per-lock hash entry */
1057 next = entry->next;
1059 }
1060
1061 xidentry->head = NULL; /* just for paranoia */
1062}
1063
1064/*
1065 * Release locks for specific XID, or all locks if it's InvalidXid.
1066 */
1067static void
1069{
1070 RecoveryLockXidEntry *entry;
1071
1072 if (TransactionIdIsValid(xid))
1073 {
1074 if ((entry = hash_search(RecoveryLockXidHash, &xid, HASH_FIND, NULL)))
1075 {
1078 }
1079 }
1080 else
1082}
1083
1084/*
1085 * Release locks for a transaction tree, starting at xid down, from
1086 * RecoveryLockXidHash.
1087 *
1088 * Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode,
1089 * to remove any AccessExclusiveLocks requested by a transaction.
1090 */
1091void
1093{
1094 int i;
1095
1097
1098 for (i = 0; i < nsubxids; i++)
1099 StandbyReleaseLocks(subxids[i]);
1100}
1101
1102/*
1103 * Called at end of recovery and when we see a shutdown checkpoint.
1104 */
1105void
1107{
1108 HASH_SEQ_STATUS status;
1109 RecoveryLockXidEntry *entry;
1110
1111 elog(DEBUG2, "release all standby locks");
1112
1114 while ((entry = hash_seq_search(&status)))
1115 {
1118 }
1119}
1120
1121/*
1122 * StandbyReleaseOldLocks
1123 * Release standby locks held by top-level XIDs that aren't running,
1124 * as long as they're not prepared transactions.
1125 *
1126 * This is needed to prune the locks of crashed transactions, which didn't
1127 * write an ABORT/COMMIT record.
1128 */
1129void
1131{
1132 HASH_SEQ_STATUS status;
1133 RecoveryLockXidEntry *entry;
1134
1136 while ((entry = hash_seq_search(&status)))
1137 {
1139
1140 /* Skip if prepared transaction. */
1142 continue;
1143
1144 /* Skip if >= oldxid. */
1145 if (!TransactionIdPrecedes(entry->xid, oldxid))
1146 continue;
1147
1148 /* Remove all locks and hash table entry. */
1151 }
1152}
1153
1154/*
1155 * --------------------------------------------------------------------
1156 * Recovery handling for Rmgr RM_STANDBY_ID
1157 *
1158 * These record types will only be created if XLogStandbyInfoActive()
1159 * --------------------------------------------------------------------
1160 */
1161
1162void
1164{
1165 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1166
1167 /* Backup blocks are not used in standby records */
1169
1170 /* Do nothing if we're not in hot standby mode */
1172 return;
1173
1174 if (info == XLOG_STANDBY_LOCK)
1175 {
1177 int i;
1178
1179 for (i = 0; i < xlrec->nlocks; i++)
1181 xlrec->locks[i].dbOid,
1182 xlrec->locks[i].relOid);
1183 }
1184 else if (info == XLOG_RUNNING_XACTS)
1185 {
1188
1189 running.xcnt = xlrec->xcnt;
1190 running.subxcnt = xlrec->subxcnt;
1192 running.nextXid = xlrec->nextXid;
1193 running.latestCompletedXid = xlrec->latestCompletedXid;
1194 running.oldestRunningXid = xlrec->oldestRunningXid;
1195 running.xids = xlrec->xids;
1196
1198
1199 /*
1200 * The startup process currently has no convenient way to schedule
1201 * stats to be reported. XLOG_RUNNING_XACTS records issued at a
1202 * regular cadence, making this a convenient location to report stats.
1203 * While these records aren't generated with wal_level=minimal, stats
1204 * also cannot be accessed during WAL replay.
1205 */
1206 pgstat_report_stat(true);
1207 }
1208 else if (info == XLOG_INVALIDATIONS)
1209 {
1211
1213 xlrec->nmsgs,
1214 xlrec->relcacheInitFileInval,
1215 xlrec->dbId,
1216 xlrec->tsId);
1217 }
1218 else
1219 elog(PANIC, "standby_redo: unknown op code %u", info);
1220}
1221
1222/*
1223 * Log details of the current snapshot to WAL. This allows the snapshot state
1224 * to be reconstructed on the standby and for logical decoding.
1225 *
1226 * This is used for Hot Standby as follows:
1227 *
1228 * We can move directly to STANDBY_SNAPSHOT_READY at startup if we
1229 * start from a shutdown checkpoint because we know nothing was running
1230 * at that time and our recovery snapshot is known empty. In the more
1231 * typical case of an online checkpoint we need to jump through a few
1232 * hoops to get a correct recovery snapshot and this requires a two or
1233 * sometimes a three stage process.
1234 *
1235 * The initial snapshot must contain all running xids and all current
1236 * AccessExclusiveLocks at a point in time on the standby. Assembling
1237 * that information while the server is running requires many and
1238 * various LWLocks, so we choose to derive that information piece by
1239 * piece and then re-assemble that info on the standby. When that
1240 * information is fully assembled we move to STANDBY_SNAPSHOT_READY.
1241 *
1242 * Since locking on the primary when we derive the information is not
1243 * strict, we note that there is a time window between the derivation and
1244 * writing to WAL of the derived information. That allows race conditions
1245 * that we must resolve, since xids and locks may enter or leave the
1246 * snapshot during that window. This creates the issue that an xid or
1247 * lock may start *after* the snapshot has been derived yet *before* the
1248 * snapshot is logged in the running xacts WAL record. We resolve this by
1249 * starting to accumulate changes at a point just prior to when we derive
1250 * the snapshot on the primary, then ignore duplicates when we later apply
1251 * the snapshot from the running xacts record. This is implemented during
1252 * CreateCheckPoint() where we use the logical checkpoint location as
1253 * our starting point and then write the running xacts record immediately
1254 * before writing the main checkpoint WAL record. Since we always start
1255 * up from a checkpoint and are immediately at our starting point, we
1256 * unconditionally move to STANDBY_INITIALIZED. After this point we
1257 * must do 4 things:
1258 * * move shared nextXid forwards as we see new xids
1259 * * extend the clog and subtrans with each new xid
1260 * * keep track of uncommitted known assigned xids
1261 * * keep track of uncommitted AccessExclusiveLocks
1262 *
1263 * When we see a commit/abort we must remove known assigned xids and locks
1264 * from the completing transaction. Attempted removals that cannot locate
1265 * an entry are expected and must not cause an error when we are in state
1266 * STANDBY_INITIALIZED. This is implemented in StandbyReleaseLocks() and
1267 * KnownAssignedXidsRemove().
1268 *
1269 * Later, when we apply the running xact data we must be careful to ignore
1270 * transactions already committed, since those commits raced ahead when
1271 * making WAL entries.
1272 *
1273 * For logical decoding only the running xacts information is needed;
1274 * there's no need to look at the locking information, but it's logged anyway,
1275 * as there's no independent knob to just enable logical decoding. For
1276 * details of how this is used, check snapbuild.c's introductory comment.
1277 *
1278 *
1279 * Returns the RecPtr of the last inserted record.
1280 */
1283{
1284 XLogRecPtr recptr;
1285 RunningTransactions running;
1286 xl_standby_lock *locks;
1287 int nlocks;
1288
1290
1291#ifdef USE_INJECTION_POINTS
1292 if (IS_INJECTION_POINT_ATTACHED("skip-log-running-xacts"))
1293 {
1294 /*
1295 * This record could move slot's xmin forward during decoding, leading
1296 * to unpredictable results, so skip it when requested by the test.
1297 */
1298 return GetInsertRecPtr();
1299 }
1300#endif
1301
1302 /*
1303 * Get details of any AccessExclusiveLocks being held at the moment.
1304 */
1305 locks = GetRunningTransactionLocks(&nlocks);
1306 if (nlocks > 0)
1307 LogAccessExclusiveLocks(nlocks, locks);
1308 pfree(locks);
1309
1310 /*
1311 * Log details of all in-progress transactions. This should be the last
1312 * record we write, because standby will open up when it sees this.
1313 */
1314 running = GetRunningTransactionData();
1315
1316 /*
1317 * GetRunningTransactionData() acquired ProcArrayLock, we must release it.
1318 * For Hot Standby this can be done before inserting the WAL record
1319 * because ProcArrayApplyRecoveryInfo() rechecks the commit status using
1320 * the clog. For logical decoding, though, the lock can't be released
1321 * early because the clog might be "in the future" from the POV of the
1322 * historic snapshot. This would allow for situations where we're waiting
1323 * for the end of a transaction listed in the xl_running_xacts record
1324 * which, according to the WAL, has committed before the xl_running_xacts
1325 * record. Fortunately this routine isn't executed frequently, and it's
1326 * only a shared lock.
1327 */
1329 LWLockRelease(ProcArrayLock);
1330
1331 recptr = LogCurrentRunningXacts(running);
1332
1333 /* Release lock if we kept it longer ... */
1335 LWLockRelease(ProcArrayLock);
1336
1337 /* GetRunningTransactionData() acquired XidGenLock, we must release it */
1338 LWLockRelease(XidGenLock);
1339
1340 return recptr;
1341}
1342
1343/*
1344 * Record an enhanced snapshot of running transactions into WAL.
1345 *
1346 * The definitions of RunningTransactionsData and xl_running_xacts are
1347 * similar. We keep them separate because xl_running_xacts is a contiguous
1348 * chunk of memory and never exists fully until it is assembled in WAL.
1349 * The inserted records are marked as not being important for durability,
1350 * to avoid triggering superfluous checkpoint / archiving activity.
1351 */
1352static XLogRecPtr
1354{
1355 xl_running_xacts xlrec;
1356 XLogRecPtr recptr;
1357
1358 xlrec.xcnt = CurrRunningXacts->xcnt;
1359 xlrec.subxcnt = CurrRunningXacts->subxcnt;
1360 xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY);
1361 xlrec.nextXid = CurrRunningXacts->nextXid;
1362 xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
1363 xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
1364
1365 /* Header */
1369
1370 /* array of TransactionIds */
1371 if (xlrec.xcnt > 0)
1372 XLogRegisterData(CurrRunningXacts->xids,
1373 (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
1374
1375 recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
1376
1377 if (xlrec.subxid_overflow)
1378 elog(DEBUG2,
1379 "snapshot of %d running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
1380 CurrRunningXacts->xcnt,
1381 LSN_FORMAT_ARGS(recptr),
1382 CurrRunningXacts->oldestRunningXid,
1383 CurrRunningXacts->latestCompletedXid,
1384 CurrRunningXacts->nextXid);
1385 else
1386 elog(DEBUG2,
1387 "snapshot of %d+%d running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
1388 CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt,
1389 LSN_FORMAT_ARGS(recptr),
1390 CurrRunningXacts->oldestRunningXid,
1391 CurrRunningXacts->latestCompletedXid,
1392 CurrRunningXacts->nextXid);
1393
1394 /*
1395 * Ensure running_xacts information is synced to disk not too far in the
1396 * future. We don't want to stall anything though (i.e. use XLogFlush()),
1397 * so we let the wal writer do it during normal operation.
1398 * XLogSetAsyncXactLSN() conveniently will mark the LSN as to-be-synced
1399 * and nudge the WALWriter into action if sleeping. Check
1400 * XLogBackgroundFlush() for details why a record might not be flushed
1401 * without it.
1402 */
1403 XLogSetAsyncXactLSN(recptr);
1404
1405 return recptr;
1406}
1407
1408/*
1409 * Wholesale logging of AccessExclusiveLocks. Other lock types need not be
1410 * logged, as described in backend/storage/lmgr/README.
1411 */
1412static void
1414{
1415 xl_standby_locks xlrec;
1416
1417 xlrec.nlocks = nlocks;
1418
1420 XLogRegisterData(&xlrec, offsetof(xl_standby_locks, locks));
1421 XLogRegisterData(locks, nlocks * sizeof(xl_standby_lock));
1423
1424 (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
1425}
1426
1427/*
1428 * Individual logging of AccessExclusiveLocks for use during LockAcquire()
1429 */
1430void
1432{
1433 xl_standby_lock xlrec;
1434
1435 xlrec.xid = GetCurrentTransactionId();
1436
1437 xlrec.dbOid = dbOid;
1438 xlrec.relOid = relOid;
1439
1440 LogAccessExclusiveLocks(1, &xlrec);
1442}
1443
1444/*
1445 * Prepare to log an AccessExclusiveLock, for use during LockAcquire()
1446 */
1447void
1449{
1450 /*
1451 * Ensure that a TransactionId has been assigned to this transaction, for
1452 * two reasons, both related to lock release on the standby. First, we
1453 * must assign an xid so that RecordTransactionCommit() and
1454 * RecordTransactionAbort() do not optimise away the transaction
1455 * completion record which recovery relies upon to release locks. It's a
1456 * hack, but for a corner case not worth adding code for into the main
1457 * commit path. Second, we must assign an xid before the lock is recorded
1458 * in shared memory, otherwise a concurrently executing
1459 * GetRunningTransactionLocks() might see a lock associated with an
1460 * InvalidTransactionId which we later assert cannot happen.
1461 */
1462 (void) GetCurrentTransactionId();
1463}
1464
1465/*
1466 * Emit WAL for invalidations. This currently is only used for commits without
1467 * an xid but which contain invalidations.
1468 */
1469void
1471 bool relcacheInitFileInval)
1472{
1473 xl_invalidations xlrec;
1474
1475 /* prepare record */
1476 memset(&xlrec, 0, sizeof(xlrec));
1477 xlrec.dbId = MyDatabaseId;
1478 xlrec.tsId = MyDatabaseTableSpace;
1479 xlrec.relcacheInitFileInval = relcacheInitFileInval;
1480 xlrec.nmsgs = nmsgs;
1481
1482 /* perform insertion */
1485 XLogRegisterData(msgs,
1486 nmsgs * sizeof(SharedInvalidationMessage));
1487 XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS);
1488}
1489
1490/* Return the description of recovery conflict */
1491static const char *
1493{
1494 const char *reasonDesc = _("unknown reason");
1495
1496 switch (reason)
1497 {
1499 reasonDesc = _("recovery conflict on buffer pin");
1500 break;
1502 reasonDesc = _("recovery conflict on lock");
1503 break;
1505 reasonDesc = _("recovery conflict on tablespace");
1506 break;
1508 reasonDesc = _("recovery conflict on snapshot");
1509 break;
1511 reasonDesc = _("recovery conflict on replication slot");
1512 break;
1514 reasonDesc = _("recovery conflict on buffer deadlock");
1515 break;
1517 reasonDesc = _("recovery conflict on database");
1518 break;
1519 default:
1520 break;
1521 }
1522
1523 return reasonDesc;
1524}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:485
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:467
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1721
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
static int32 next
Definition: blutils.c:224
static void cleanup(void)
Definition: bootstrap.c:713
bool HoldingBufferPinThatDelaysRecovery(void)
Definition: bufmgr.c:5759
uint8_t uint8
Definition: c.h:500
uint64_t uint64
Definition: c.h:503
uint32_t uint32
Definition: c.h:502
uint32 TransactionId
Definition: c.h:623
#define OidIsValid(objectId)
Definition: c.h:746
int64 TimestampTz
Definition: timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:865
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
int errdetail(const char *fmt,...)
Definition: elog.c:1204
int errcode(int sqlerrcode)
Definition: elog.c:854
int errdetail_log_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1273
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define _(x)
Definition: elog.c:91
#define LOG
Definition: elog.h:31
#define DEBUG2
Definition: elog.h:29
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:149
#define DEBUG4
Definition: elog.h:27
ProcNumber MyProcNumber
Definition: globals.c:91
Oid MyDatabaseTableSpace
Definition: globals.c:97
Oid MyDatabaseId
Definition: globals.c:95
Assert(PointerIsAligned(start, uint64))
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
#define IS_INJECTION_POINT_ATTACHED(name)
void ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, int nmsgs, bool RelcacheInitFileInval, Oid dbid, Oid tsid)
Definition: inval.c:1128
int i
Definition: isn.c:77
LockAcquireResult LockAcquire(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock, bool dontWait)
Definition: lock.c:805
void VirtualXactLockTableInsert(VirtualTransactionId vxid)
Definition: lock.c:4587
bool LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
Definition: lock.c:2067
void VirtualXactLockTableCleanup(void)
Definition: lock.c:4610
bool VirtualXactLock(VirtualTransactionId vxid, bool wait)
Definition: lock.c:4710
VirtualTransactionId * GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp)
Definition: lock.c:3034
xl_standby_lock * GetRunningTransactionLocks(int *nlocks)
Definition: lock.c:4138
#define VirtualTransactionIdIsValid(vxid)
Definition: lock.h:68
#define SET_LOCKTAG_RELATION(locktag, dboid, reloid)
Definition: lock.h:182
#define AccessExclusiveLock
Definition: lockdefs.h:43
struct xl_standby_lock xl_standby_lock
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1902
void pfree(void *pointer)
Definition: mcxt.c:2147
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
static char * buf
Definition: pg_test_fsync.c:72
#define ERRCODE_T_R_DEADLOCK_DETECTED
Definition: pgbench.c:78
long pgstat_report_stat(bool force)
Definition: pgstat.c:691
#define InvalidOid
Definition: postgres_ext.h:35
unsigned int Oid
Definition: postgres_ext.h:30
pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3497
void ExpireAllKnownAssignedTransactionIds(void)
Definition: procarray.c:4498
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2689
void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3659
pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
Definition: procarray.c:3491
int CountDBBackends(Oid databaseid)
Definition: procarray.c:3599
PGPROC * ProcNumberGetProc(ProcNumber procNumber)
Definition: procarray.c:3138
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
VirtualTransactionId * GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
Definition: procarray.c:3417
ProcSignalReason
Definition: procsignal.h:31
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition: procsignal.h:48
@ PROCSIG_RECOVERY_CONFLICT_LOCK
Definition: procsignal.h:45
@ PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT
Definition: procsignal.h:47
@ PROCSIG_RECOVERY_CONFLICT_DATABASE
Definition: procsignal.h:43
@ PROCSIG_RECOVERY_CONFLICT_SNAPSHOT
Definition: procsignal.h:46
@ PROCSIG_RECOVERY_CONFLICT_TABLESPACE
Definition: procsignal.h:44
@ PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK
Definition: procsignal.h:49
void set_ps_display_remove_suffix(void)
Definition: ps_status.c:423
void set_ps_display_suffix(const char *suffix)
Definition: ps_status.c:371
bool update_process_title
Definition: ps_status.c:31
void pg_usleep(long microsec)
Definition: signal.c:53
void SharedInvalBackendInit(bool sendOnly)
Definition: sinvaladt.c:271
LocalTransactionId GetNextLocalTransactionId(void)
Definition: sinvaladt.c:700
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1976
@ RS_INVAL_HORIZON
Definition: slot.h:57
PGPROC * MyProc
Definition: proc.c:67
int DeadlockTimeout
Definition: proc.c:58
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1975
void standby_redo(XLogReaderState *record)
Definition: standby.c:1163
void ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:512
static bool WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
Definition: standby.c:234
static volatile sig_atomic_t got_standby_deadlock_timeout
Definition: standby.c:69
static TimestampTz GetStandbyLimitTime(void)
Definition: standby.c:201
void StandbyTimeoutHandler(void)
Definition: standby.c:945
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:793
static volatile sig_atomic_t got_standby_delay_timeout
Definition: standby.c:70
void StandbyLockTimeoutHandler(void)
Definition: standby.c:954
static int standbyWait_us
Definition: standby.c:226
static void StandbyReleaseXidEntryLocks(RecoveryLockXidEntry *xidentry)
Definition: standby.c:1035
void StandbyDeadLockHandler(void)
Definition: standby.c:936
static HTAB * RecoveryLockXidHash
Definition: standby.c:66
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1282
struct RecoveryLockEntry RecoveryLockEntry
void CheckRecoveryConflictDeadlock(void)
Definition: standby.c:905
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:95
void ResolveRecoveryConflictWithTablespace(Oid tsid)
Definition: standby.c:539
static const char * get_recovery_conflict_desc(ProcSignalReason reason)
Definition: standby.c:1492
bool log_recovery_conflict_waits
Definition: standby.c:42
#define STANDBY_INITIAL_WAIT_US
Definition: standby.c:225
static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, ProcSignalReason reason, uint32 wait_event_info, bool report_waiting)
Definition: standby.c:360
static volatile sig_atomic_t got_standby_lock_timeout
Definition: standby.c:71
void ResolveRecoveryConflictWithDatabase(Oid dbid)
Definition: standby.c:569
void StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition: standby.c:1092
void StandbyReleaseOldLocks(TransactionId oldxid)
Definition: standby.c:1130
struct RecoveryLockXidEntry RecoveryLockXidEntry
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:468
static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
Definition: standby.c:877
void LogAccessExclusiveLockPrepare(void)
Definition: standby.c:1448
static HTAB * RecoveryLockHash
Definition: standby.c:65
static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
Definition: standby.c:1413
void LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs, bool relcacheInitFileInval)
Definition: standby.c:1470
void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
Definition: standby.c:986
static void StandbyReleaseLocks(TransactionId xid)
Definition: standby.c:1068
void LogAccessExclusiveLock(Oid dbOid, Oid relOid)
Definition: standby.c:1431
int max_standby_archive_delay
Definition: standby.c:40
void StandbyReleaseAllLocks(void)
Definition: standby.c:1106
int max_standby_streaming_delay
Definition: standby.c:41
static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
Definition: standby.c:1353
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:274
void ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
Definition: standby.c:623
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:161
#define MinSizeOfXactRunningXacts
Definition: standby.h:63
@ SUBXIDS_MISSING
Definition: standby.h:81
@ SUBXIDS_IN_ARRAY
Definition: standby.h:80
#define XLOG_INVALIDATIONS
Definition: standbydefs.h:36
#define MinSizeOfInvalidations
Definition: standbydefs.h:72
#define XLOG_STANDBY_LOCK
Definition: standbydefs.h:34
#define XLOG_RUNNING_XACTS
Definition: standbydefs.h:35
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
TimeoutType type
Definition: timeout.h:61
TimestampTz fin_time
Definition: timeout.h:63
TimeoutId id
Definition: timeout.h:60
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
Definition: dynahash.c:220
Definition: lock.h:166
uint8 locktag_type
Definition: lock.h:171
Definition: proc.h:163
struct PGPROC::@127 vxid
pg_atomic_uint64 waitStart
Definition: proc.h:238
ProcNumber procNumber
Definition: proc.h:196
int pid
Definition: proc.h:183
struct RecoveryLockEntry * next
Definition: standby.c:56
xl_standby_lock key
Definition: standby.c:55
TransactionId xid
Definition: standby.c:61
struct RecoveryLockEntry * head
Definition: standby.c:62
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId nextXid
Definition: standby.h:91
TransactionId latestCompletedXid
Definition: standby.h:95
subxids_array_status subxid_status
Definition: standby.h:90
TransactionId * xids
Definition: standby.h:97
LocalTransactionId localTransactionId
Definition: lock.h:63
ProcNumber procNumber
Definition: lock.h:62
SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:69
bool relcacheInitFileInval
Definition: standbydefs.h:67
TransactionId latestCompletedXid
Definition: standbydefs.h:54
TransactionId oldestRunningXid
Definition: standbydefs.h:53
TransactionId xids[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:56
TransactionId nextXid
Definition: standbydefs.h:52
TransactionId xid
Definition: lockdefs.h:53
xl_standby_lock locks[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:41
void disable_all_timeouts(bool keep_indicators)
Definition: timeout.c:751
void enable_timeouts(const EnableTimeoutParams *timeouts, int count)
Definition: timeout.c:630
@ STANDBY_LOCK_TIMEOUT
Definition: timeout.h:32
@ STANDBY_DEADLOCK_TIMEOUT
Definition: timeout.h:30
@ STANDBY_TIMEOUT
Definition: timeout.h:31
@ TMPARAM_AT
Definition: timeout.h:54
@ TMPARAM_AFTER
Definition: timeout.h:53
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:188
#define InvalidTransactionId
Definition: transam.h:31
#define U64FromFullTransactionId(x)
Definition: transam.h:49
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
#define MaxTransactionId
Definition: transam.h:35
bool StandbyTransactionIdIsPrepared(TransactionId xid)
Definition: twophase.c:1459
#define TimestampTzPlusMilliseconds(tz, ms)
Definition: timestamp.h:85
FullTransactionId ReadNextFullTransactionId(void)
Definition: varsup.c:288
#define PG_WAIT_LOCK
Definition: wait_event.h:19
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
static volatile sig_atomic_t waiting
Definition: waiteventset.c:170
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:454
int MyXactFlags
Definition: xact.c:136
#define XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK
Definition: xact.h:108
XLogRecPtr GetInsertRecPtr(void)
Definition: xlog.c:6670
int wal_level
Definition: xlog.c:131
void XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
Definition: xlog.c:2752
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:155
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream)
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:52
@ STANDBY_INITIALIZED
Definition: xlogutils.h:53
#define InHotStandby
Definition: xlogutils.h:60