PostgreSQL Source Code git master
Loading...
Searching...
No Matches
standby.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * standby.c
4 * Misc functions used in Hot Standby mode.
5 *
6 * All functions for handling RM_STANDBY_ID, which relate to
7 * AccessExclusiveLocks and starting snapshots for Hot Standby mode.
8 * Plus conflict recovery processing.
9 *
10 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 * IDENTIFICATION
14 * src/backend/storage/ipc/standby.c
15 *
16 *-------------------------------------------------------------------------
17 */
18#include "postgres.h"
19#include "access/transam.h"
20#include "access/twophase.h"
21#include "access/xact.h"
22#include "access/xloginsert.h"
23#include "access/xlogrecovery.h"
24#include "access/xlogutils.h"
25#include "miscadmin.h"
26#include "pgstat.h"
27#include "replication/slot.h"
28#include "storage/bufmgr.h"
29#include "storage/proc.h"
30#include "storage/procarray.h"
31#include "storage/sinvaladt.h"
32#include "storage/standby.h"
33#include "utils/hsearch.h"
35#include "utils/ps_status.h"
36#include "utils/timeout.h"
37#include "utils/timestamp.h"
38#include "utils/wait_event.h"
39
40/* User-settable GUC parameters */
44
45/*
46 * Keep track of all the exclusive locks owned by original transactions.
47 * For each known exclusive lock, there is a RecoveryLockEntry in the
48 * RecoveryLockHash hash table. All RecoveryLockEntrys belonging to a
49 * given XID are chained together so that we can find them easily.
50 * For each original transaction that is known to have any such locks,
51 * there is a RecoveryLockXidEntry in the RecoveryLockXidHash hash table,
52 * which stores the head of the chain of its locks.
53 */
54typedef struct RecoveryLockEntry
55{
56 xl_standby_lock key; /* hash key: xid, dbOid, relOid */
57 struct RecoveryLockEntry *next; /* chain link */
59
61{
62 TransactionId xid; /* hash key -- must be first */
63 struct RecoveryLockEntry *head; /* chain head */
65
68
69/* Flags set by timeout handlers */
72static volatile sig_atomic_t got_standby_lock_timeout = false;
73
76 uint32 wait_event_info,
77 bool report_waiting);
80static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
81static const char *get_recovery_conflict_desc(RecoveryConflictReason reason);
82
83/*
84 * InitRecoveryTransactionEnvironment
85 * Initialize tracking of our primary's in-progress transactions.
86 *
87 * We need to issue shared invalidations and hold locks. Holding locks
88 * means others may want to wait on us, so we need to make a lock table
89 * vxact entry like a real transaction. We could create and delete
90 * lock table entries for each transaction but its simpler just to create
91 * one permanent entry and leave it there all the time. Locks are then
92 * acquired and released as needed. Yes, this means you can see the
93 * Startup process in pg_locks once we have run this.
94 */
95void
97{
100
101 Assert(RecoveryLockHash == NULL); /* don't run this twice */
102
103 /*
104 * Initialize the hash tables for tracking the locks held by each
105 * transaction.
106 */
107 hash_ctl.keysize = sizeof(xl_standby_lock);
108 hash_ctl.entrysize = sizeof(RecoveryLockEntry);
109 RecoveryLockHash = hash_create("RecoveryLockHash",
110 64,
111 &hash_ctl,
113 hash_ctl.keysize = sizeof(TransactionId);
114 hash_ctl.entrysize = sizeof(RecoveryLockXidEntry);
115 RecoveryLockXidHash = hash_create("RecoveryLockXidHash",
116 64,
117 &hash_ctl,
119
120 /*
121 * Initialize shared invalidation management for Startup process, being
122 * careful to register ourselves as a sendOnly process so we don't need to
123 * read messages, nor will we get signaled when the queue starts filling
124 * up.
125 */
127
128 /*
129 * Lock a virtual transaction id for Startup process.
130 *
131 * We need to do GetNextLocalTransactionId() because
132 * SharedInvalBackendInit() leaves localTransactionId invalid and the lock
133 * manager doesn't like that at all.
134 *
135 * Note that we don't need to run XactLockTableInsert() because nobody
136 * needs to wait on xids. That sounds a little strange, but table locks
137 * are held by vxids and row level locks are held by xids. All queries
138 * hold AccessShareLocks so never block while we write or lock new rows.
139 */
144
146}
147
148/*
149 * ShutdownRecoveryTransactionEnvironment
150 * Shut down transaction tracking
151 *
152 * Prepare to switch from hot standby mode to normal operation. Shut down
153 * recovery-time transaction tracking.
154 *
155 * This must be called even in shutdown of startup process if transaction
156 * tracking has been initialized. Otherwise some locks the tracked
157 * transactions were holding will not be released and may interfere with
158 * the processes still running (but will exit soon later) at the exit of
159 * startup process.
160 */
161void
163{
164 /*
165 * Do nothing if RecoveryLockHash is NULL because that means that
166 * transaction tracking has not yet been initialized or has already been
167 * shut down. This makes it safe to have possibly-redundant calls of this
168 * function during process exit.
169 */
170 if (RecoveryLockHash == NULL)
171 return;
172
173 /* Mark all tracked in-progress transactions as finished. */
175
176 /* Release all locks the tracked transactions were holding */
178
179 /* Destroy the lock hash tables. */
184
185 /* Cleanup our VirtualTransaction */
187}
188
189
190/*
191 * -----------------------------------------------------
192 * Standby wait timers and backend cancel logic
193 * -----------------------------------------------------
194 */
195
196/*
197 * Determine the cutoff time at which we want to start canceling conflicting
198 * transactions. Returns zero (a time safely in the past) if we are willing
199 * to wait forever.
200 */
201static TimestampTz
203{
205 bool fromStream;
206
207 /*
208 * The cutoff time is the last WAL data receipt time plus the appropriate
209 * delay variable. Delay of -1 means wait forever.
210 */
212 if (fromStream)
213 {
215 return 0; /* wait forever */
217 }
218 else
219 {
221 return 0; /* wait forever */
223 }
224}
225
226#define STANDBY_INITIAL_WAIT_US 1000
228
229/*
230 * Standby wait logic for ResolveRecoveryConflictWithVirtualXIDs.
231 * We wait here for a while then return. If we decide we can't wait any
232 * more then we return true, if we can wait some more return false.
233 */
234static bool
236{
238
240
241 /* Are we past the limit time? */
243 if (ltime && GetCurrentTimestamp() >= ltime)
244 return true;
245
246 /*
247 * Sleep a bit (this is essential to avoid busy-waiting).
248 */
249 pgstat_report_wait_start(wait_event_info);
252
253 /*
254 * Progressively increase the sleep times, but not to more than 1s, since
255 * pg_usleep isn't interruptible on some platforms.
256 */
257 standbyWait_us *= 2;
258 if (standbyWait_us > 1000000)
259 standbyWait_us = 1000000;
260
261 return false;
262}
263
264/*
265 * Log the recovery conflict.
266 *
267 * wait_start is the timestamp when the caller started to wait.
268 * now is the timestamp when this function has been called.
269 * wait_list is the list of virtual transaction ids assigned to
270 * conflicting processes. still_waiting indicates whether
271 * the startup process is still waiting for the recovery conflict
272 * to be resolved or not.
273 */
274void
277 bool still_waiting)
278{
279 long secs;
280 int usecs;
281 long msecs;
283 int nprocs = 0;
284
285 /*
286 * There must be no conflicting processes when the recovery conflict has
287 * already been resolved.
288 */
290
292 msecs = secs * 1000 + usecs / 1000;
293 usecs = usecs % 1000;
294
295 if (wait_list)
296 {
298
299 /* Construct a string of list of the conflicting processes */
302 {
303 PGPROC *proc = ProcNumberGetProc(vxids->procNumber);
304
305 /* proc can be NULL if the target backend is not active */
306 if (proc)
307 {
308 if (nprocs == 0)
309 {
311 appendStringInfo(&buf, "%d", proc->pid);
312 }
313 else
314 appendStringInfo(&buf, ", %d", proc->pid);
315
316 nprocs++;
317 }
318
319 vxids++;
320 }
321 }
322
323 /*
324 * If wait_list is specified, report the list of PIDs of active
325 * conflicting backends in a detail message. Note that if all the backends
326 * in the list are not active, no detail message is logged.
327 */
328 if (still_waiting)
329 {
330 ereport(LOG,
331 errmsg("recovery still waiting after %ld.%03d ms: %s",
333 nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
334 "Conflicting processes: %s.",
335 nprocs, buf.data) : 0);
336 }
337 else
338 {
339 ereport(LOG,
340 errmsg("recovery finished waiting after %ld.%03d ms: %s",
342 }
343
344 if (nprocs > 0)
345 pfree(buf.data);
346}
347
348/*
349 * This is the main executioner for any query backend that conflicts with
350 * recovery processing. Judgement has already been passed on it within
351 * a specific rmgr. Here we just issue the orders to the procs. The procs
352 * then throw the required error as instructed.
353 *
354 * If report_waiting is true, "waiting" is reported in PS display and the
355 * wait for recovery conflict is reported in the log, if necessary. If
356 * the caller is responsible for reporting them, report_waiting should be
357 * false. Otherwise, both the caller and this function report the same
358 * thing unexpectedly.
359 */
360static void
363 uint32 wait_event_info,
364 bool report_waiting)
365{
366 TimestampTz waitStart = 0;
367 bool waiting = false;
368 bool logged_recovery_conflict = false;
369
370 /* Fast exit, to avoid a kernel call if there's no work to be done. */
372 return;
373
374 /* Set the wait start timestamp for reporting */
376 waitStart = GetCurrentTimestamp();
377
379 {
380 /* reset standbyWait_us for each xact we wait for */
382
383 /* wait until the virtual xid is gone */
384 while (!VirtualXactLock(*waitlist, false))
385 {
386 /* Is it time to kill it? */
387 if (WaitExceedsMaxStandbyDelay(wait_event_info))
388 {
389 bool signaled;
390
391 /*
392 * Now find out who to throw out of the balloon.
393 */
396
397 /*
398 * Wait a little bit for it to die so that we avoid flooding
399 * an unresponsive backend when system is heavily loaded.
400 */
401 if (signaled)
402 pg_usleep(5000L);
403 }
404
405 if (waitStart != 0 && (!logged_recovery_conflict || !waiting))
406 {
407 TimestampTz now = 0;
410
413
414 /* Get the current timestamp if not report yet */
417
418 /*
419 * Report via ps if we have been waiting for more than 500
420 * msec (should that be configurable?)
421 */
422 if (maybe_update_title &&
423 TimestampDifferenceExceeds(waitStart, now, 500))
424 {
425 set_ps_display_suffix("waiting");
426 waiting = true;
427 }
428
429 /*
430 * Emit the log message if the startup process is waiting
431 * longer than deadlock_timeout for recovery conflict.
432 */
433 if (maybe_log_conflict &&
435 {
436 LogRecoveryConflict(reason, waitStart, now, waitlist, true);
438 }
439 }
440 }
441
442 /* The virtual transaction is gone now, wait for the next one */
443 waitlist++;
444 }
445
446 /*
447 * Emit the log message if recovery conflict was resolved but the startup
448 * process waited longer than deadlock_timeout for it.
449 */
451 LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
452 NULL, false);
453
454 /* reset ps display to remove the suffix if we added one */
455 if (waiting)
457
458}
459
460/*
461 * Generate whatever recovery conflicts are needed to eliminate snapshots that
462 * might see XIDs <= snapshotConflictHorizon as still running.
463 *
464 * snapshotConflictHorizon cutoffs are our standard approach to generating
465 * granular recovery conflicts. Note that InvalidTransactionId values are
466 * interpreted as "definitely don't need any conflicts" here, which is a
467 * general convention that WAL records can (and often do) depend on.
468 */
469void
471 bool isCatalogRel,
472 RelFileLocator locator)
473{
475
476 /*
477 * If we get passed InvalidTransactionId then we do nothing (no conflict).
478 *
479 * This can happen when replaying already-applied WAL records after a
480 * standby crash or restart, or when replaying an XLOG_HEAP2_VISIBLE
481 * record that marks as frozen a page which was already all-visible. It's
482 * also quite common with records generated during index deletion
483 * (original execution of the deletion can reason that a recovery conflict
484 * which is sufficient for the deletion operation must take place before
485 * replay of the deletion record itself).
486 */
487 if (!TransactionIdIsValid(snapshotConflictHorizon))
488 return;
489
490 Assert(TransactionIdIsNormal(snapshotConflictHorizon));
491 backends = GetConflictingVirtualXIDs(snapshotConflictHorizon,
492 locator.dbOid);
496 true);
497
498 /*
499 * Note that WaitExceedsMaxStandbyDelay() is not taken into account here
500 * (as opposed to ResolveRecoveryConflictWithVirtualXIDs() above). That
501 * seems OK, given that this kind of conflict should not normally be
502 * reached, e.g. due to using a physical replication slot.
503 */
504 if (IsLogicalDecodingEnabled() && isCatalogRel)
506 snapshotConflictHorizon);
507}
508
509/*
510 * Variant of ResolveRecoveryConflictWithSnapshot that works with
511 * FullTransactionId values
512 */
513void
515 bool isCatalogRel,
516 RelFileLocator locator)
517{
518 /*
519 * ResolveRecoveryConflictWithSnapshot operates on 32-bit TransactionIds,
520 * so truncate the logged FullTransactionId. If the logged value is very
521 * old, so that XID wrap-around already happened on it, there can't be any
522 * snapshots that still see it.
523 */
525 uint64 diff;
526
527 diff = U64FromFullTransactionId(nextXid) -
528 U64FromFullTransactionId(snapshotConflictHorizon);
529 if (diff < MaxTransactionId / 2)
530 {
531 TransactionId truncated;
532
533 truncated = XidFromFullTransactionId(snapshotConflictHorizon);
535 isCatalogRel,
536 locator);
537 }
538}
539
540void
542{
544
545 /*
546 * Standby users may be currently using this tablespace for their
547 * temporary files. We only care about current users because
548 * temp_tablespace parameter will just ignore tablespaces that no longer
549 * exist.
550 *
551 * Ask everybody to cancel their queries immediately so we can ensure no
552 * temp files remain and we can remove the tablespace. Nuke the entire
553 * site from orbit, it's the only way to be sure.
554 *
555 * XXX: We could work out the pids of active backends using this
556 * tablespace by examining the temp filenames in the directory. We would
557 * then convert the pids into VirtualXIDs before attempting to cancel
558 * them.
559 *
560 * We don't wait for commit because drop tablespace is non-transactional.
561 */
563 InvalidOid);
567 true);
568}
569
570void
572{
573 /*
574 * We don't do ResolveRecoveryConflictWithVirtualXIDs() here since that
575 * only waits for transactions and completely idle sessions would block
576 * us. This is rare enough that we do this as simply as possible: no wait,
577 * just force them off immediately.
578 *
579 * No locking is required here because we already acquired
580 * AccessExclusiveLock. Anybody trying to connect while we do this will
581 * block during InitPostgres() and then disconnect when they see the
582 * database has been removed.
583 */
584 while (CountDBBackends(dbid) > 0)
585 {
587
588 /*
589 * Wait awhile for them to die so that we avoid flooding an
590 * unresponsive backend when system is heavily loaded.
591 */
592 pg_usleep(10000);
593 }
594}
595
596/*
597 * ResolveRecoveryConflictWithLock is called from ProcSleep()
598 * to resolve conflicts with other backends holding relation locks.
599 *
600 * The WaitLatch sleep normally done in ProcSleep()
601 * (when not InHotStandby) is performed here, for code clarity.
602 *
603 * We either resolve conflicts immediately or set a timeout to wake us at
604 * the limit of our patience.
605 *
606 * Resolve conflicts by canceling to all backends holding a conflicting
607 * lock. As we are already queued to be granted the lock, no new lock
608 * requests conflicting with ours will be granted in the meantime.
609 *
610 * We also must check for deadlocks involving the Startup process and
611 * hot-standby backend processes. If deadlock_timeout is reached in
612 * this function, all the backends holding the conflicting locks are
613 * requested to check themselves for deadlocks.
614 *
615 * logging_conflict should be true if the recovery conflict has not been
616 * logged yet even though logging is enabled. After deadlock_timeout is
617 * reached and the request for deadlock check is sent, we wait again to
618 * be signaled by the release of the lock if logging_conflict is false.
619 * Otherwise we return without waiting again so that the caller can report
620 * the recovery conflict. In this case, then, this function is called again
621 * with logging_conflict=false (because the recovery conflict has already
622 * been logged) and we will wait again for the lock to be released.
623 */
624void
626{
629
631
634
635 /*
636 * Update waitStart if first time through after the startup process
637 * started waiting for the lock. It should not be updated every time
638 * ResolveRecoveryConflictWithLock() is called during the wait.
639 *
640 * Use the current time obtained for comparison with ltime as waitStart
641 * (i.e., the time when this process started waiting for the lock). Since
642 * getting the current time newly can cause overhead, we reuse the
643 * already-obtained time to avoid that overhead.
644 *
645 * Note that waitStart is updated without holding the lock table's
646 * partition lock, to avoid the overhead by additional lock acquisition.
647 * This can cause "waitstart" in pg_locks to become NULL for a very short
648 * period of time after the wait started even though "granted" is false.
649 * This is OK in practice because we can assume that users are likely to
650 * look at "waitstart" when waiting for the lock for a long time.
651 */
654
655 if (now >= ltime && ltime != 0)
656 {
657 /*
658 * We're already behind, so clear a path as quickly as possible.
659 */
661
663
664 /*
665 * Prevent ResolveRecoveryConflictWithVirtualXIDs() from reporting
666 * "waiting" in PS display by disabling its argument report_waiting
667 * because the caller, WaitOnLock(), has already reported that.
668 */
671 PG_WAIT_LOCK | locktag.locktag_type,
672 false);
673 }
674 else
675 {
676 /*
677 * Wait (or wait again) until ltime, and check for deadlocks as well
678 * if we will be waiting longer than deadlock_timeout
679 */
681 int cnt = 0;
682
683 if (ltime != 0)
684 {
687 timeouts[cnt].type = TMPARAM_AT;
688 timeouts[cnt].fin_time = ltime;
689 cnt++;
690 }
691
694 timeouts[cnt].type = TMPARAM_AFTER;
695 timeouts[cnt].delay_ms = DeadlockTimeout;
696 cnt++;
697
699 }
700
701 /* Wait to be signaled by the release of the Relation Lock */
703
704 /*
705 * Exit if ltime is reached. Then all the backends holding conflicting
706 * locks will be canceled in the next ResolveRecoveryConflictWithLock()
707 * call.
708 */
710 goto cleanup;
711
713 {
715
717
718 /* Quick exit if there's no work to be done */
720 goto cleanup;
721
722 /*
723 * Send signals to all the backends holding the conflicting locks, to
724 * ask them to check themselves for deadlocks.
725 */
727 {
730 backends++;
731 }
732
733 /*
734 * Exit if the recovery conflict has not been logged yet even though
735 * logging is enabled, so that the caller can log that. Then
736 * RecoveryConflictWithLock() is called again and we will wait again
737 * for the lock to be released.
738 */
740 goto cleanup;
741
742 /*
743 * Wait again here to be signaled by the release of the Relation Lock,
744 * to prevent the subsequent RecoveryConflictWithLock() from causing
745 * deadlock_timeout and sending a request for deadlocks check again.
746 * Otherwise the request continues to be sent every deadlock_timeout
747 * until the relation locks are released or ltime is reached.
748 */
751 }
752
753cleanup:
754
755 /*
756 * Clear any timeout requests established above. We assume here that the
757 * Startup process doesn't have any other outstanding timeouts than those
758 * used by this function. If that stops being true, we could cancel the
759 * timeouts individually, but that'd be slower.
760 */
764}
765
766/*
767 * ResolveRecoveryConflictWithBufferPin is called from LockBufferForCleanup()
768 * to resolve conflicts with other backends holding buffer pins.
769 *
770 * The ProcWaitForSignal() sleep normally done in LockBufferForCleanup()
771 * (when not InHotStandby) is performed here, for code clarity.
772 *
773 * We either resolve conflicts immediately or set a timeout to wake us at
774 * the limit of our patience.
775 *
776 * Resolve conflicts by sending a PROCSIG signal to all backends to check if
777 * they hold one of the buffer pins that is blocking Startup process. If so,
778 * those backends will take an appropriate error action, ERROR or FATAL.
779 *
780 * We also must check for deadlocks. Deadlocks occur because if queries
781 * wait on a lock, that must be behind an AccessExclusiveLock, which can only
782 * be cleared if the Startup process replays a transaction completion record.
783 * If Startup process is also waiting then that is a deadlock. The deadlock
784 * can occur if the query is waiting and then the Startup sleeps, or if
785 * Startup is sleeping and the query waits on a lock. We protect against
786 * only the former sequence here, the latter sequence is checked prior to
787 * the query sleeping, in CheckRecoveryConflictDeadlock().
788 *
789 * Deadlocks are extremely rare, and relatively expensive to check for,
790 * so we don't do a deadlock check right away ... only if we have had to wait
791 * at least deadlock_timeout.
792 */
793void
795{
797
799
801
802 if (GetCurrentTimestamp() >= ltime && ltime != 0)
803 {
804 /*
805 * We're already behind, so clear a path as quickly as possible.
806 */
808 }
809 else
810 {
811 /*
812 * Wake up at ltime, and check for deadlocks as well if we will be
813 * waiting longer than deadlock_timeout
814 */
816 int cnt = 0;
817
818 if (ltime != 0)
819 {
821 timeouts[cnt].type = TMPARAM_AT;
822 timeouts[cnt].fin_time = ltime;
823 cnt++;
824 }
825
828 timeouts[cnt].type = TMPARAM_AFTER;
829 timeouts[cnt].delay_ms = DeadlockTimeout;
830 cnt++;
831
833 }
834
835 /*
836 * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
837 * by one of the timeouts established above.
838 *
839 * We assume that only UnpinBuffer() and the timeout requests established
840 * above can wake us up here. WakeupRecovery() called by walreceiver or
841 * SIGHUP signal handler, etc cannot do that because it uses the different
842 * latch from that ProcWaitForSignal() waits on.
843 */
845
849 {
850 /*
851 * Send out a request for hot-standby backends to check themselves for
852 * deadlocks.
853 *
854 * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
855 * to be signaled by UnpinBuffer() again and send a request for
856 * deadlocks check if deadlock_timeout happens. This causes the
857 * request to continue to be sent every deadlock_timeout until the
858 * buffer is unpinned or ltime is reached. This would increase the
859 * workload in the startup process and backends. In practice it may
860 * not be so harmful because the period that the buffer is kept pinned
861 * is basically no so long. But we should fix this?
862 */
864 }
865
866 /*
867 * Clear any timeout requests established above. We assume here that the
868 * Startup process doesn't have any other timeouts than what this function
869 * uses. If that stops being true, we could cancel the timeouts
870 * individually, but that'd be slower.
871 */
875}
876
877static void
879{
882
883 /*
884 * We send signal to all backends to ask them if they are holding the
885 * buffer pin which is delaying the Startup process. Most of them will be
886 * innocent, but we let the SIGUSR1 handling in each backend decide their
887 * own fate.
888 */
890}
891
892/*
893 * In Hot Standby perform early deadlock detection. We abort the lock
894 * wait if we are about to sleep while holding the buffer pin that Startup
895 * process is waiting for.
896 *
897 * Note: this code is pessimistic, because there is no way for it to
898 * determine whether an actual deadlock condition is present: the lock we
899 * need to wait for might be unrelated to any held by the Startup process.
900 * Sooner or later, this mechanism should get ripped out in favor of somehow
901 * accounting for buffer locks in DeadLockCheck(). However, errors here
902 * seem to be very low-probability in practice, so for now it's not worth
903 * the trouble.
904 */
905void
907{
908 Assert(!InRecovery); /* do not call in Startup process */
909
911 return;
912
913 /*
914 * Error message should match ProcessInterrupts() but we avoid calling
915 * that because we aren't handling an interrupt at this point. Note that
916 * we only cancel the current transaction here, so if we are in a
917 * subtransaction and the pin is held by a parent, then the Startup
918 * process will continue to wait even though we have avoided deadlock.
919 */
922 errmsg("canceling statement due to conflict with recovery"),
923 errdetail("User transaction caused buffer deadlock with recovery.")));
924}
925
926
927/* --------------------------------
928 * timeout handler routines
929 * --------------------------------
930 */
931
932/*
933 * StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT is
934 * exceeded.
935 */
936void
941
942/*
943 * StandbyTimeoutHandler() will be called if STANDBY_TIMEOUT is exceeded.
944 */
945void
950
951/*
952 * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
953 */
954void
959
960/*
961 * -----------------------------------------------------
962 * Locking in Recovery Mode
963 * -----------------------------------------------------
964 *
965 * All locks are held by the Startup process using a single virtual
966 * transaction. This implementation is both simpler and in some senses,
967 * more correct. The locks held mean "some original transaction held
968 * this lock, so query access is not allowed at this time". So the Startup
969 * process is the proxy by which the original locks are implemented.
970 *
971 * We only keep track of AccessExclusiveLocks, which are only ever held by
972 * one transaction on one relation.
973 *
974 * We keep a table of known locks in the RecoveryLockHash hash table.
975 * The point of that table is to let us efficiently de-duplicate locks,
976 * which is important because checkpoints will re-report the same locks
977 * already held. There is also a RecoveryLockXidHash table with one entry
978 * per xid, which allows us to efficiently find all the locks held by a
979 * given original transaction.
980 *
981 * We use session locks rather than normal locks so we don't need
982 * ResourceOwners.
983 */
984
985
986void
988{
992 LOCKTAG locktag;
993 bool found;
994
995 /* Already processed? */
996 if (!TransactionIdIsValid(xid) ||
999 return;
1000
1001 elog(DEBUG4, "adding recovery lock: db %u rel %u", dbOid, relOid);
1002
1003 /* dbOid is InvalidOid when we are locking a shared relation. */
1004 Assert(OidIsValid(relOid));
1005
1006 /* Create a hash entry for this xid, if we don't have one already. */
1008 if (!found)
1009 {
1010 Assert(xidentry->xid == xid); /* dynahash should have set this */
1011 xidentry->head = NULL;
1012 }
1013
1014 /* Create a hash entry for this lock, unless we have one already. */
1015 key.xid = xid;
1016 key.dbOid = dbOid;
1017 key.relOid = relOid;
1019 if (!found)
1020 {
1021 /* It's new, so link it into the XID's list ... */
1022 lockentry->next = xidentry->head;
1023 xidentry->head = lockentry;
1024
1025 /* ... and acquire the lock locally. */
1026 SET_LOCKTAG_RELATION(locktag, dbOid, relOid);
1027
1028 (void) LockAcquire(&locktag, AccessExclusiveLock, true, false);
1029 }
1030}
1031
1032/*
1033 * Release all the locks associated with this RecoveryLockXidEntry.
1034 */
1035static void
1037{
1038 RecoveryLockEntry *entry;
1040
1041 for (entry = xidentry->head; entry != NULL; entry = next)
1042 {
1043 LOCKTAG locktag;
1044
1045 elog(DEBUG4,
1046 "releasing recovery lock: xid %u db %u rel %u",
1047 entry->key.xid, entry->key.dbOid, entry->key.relOid);
1048 /* Release the lock ... */
1049 SET_LOCKTAG_RELATION(locktag, entry->key.dbOid, entry->key.relOid);
1050 if (!LockRelease(&locktag, AccessExclusiveLock, true))
1051 {
1052 elog(LOG,
1053 "RecoveryLockHash contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u",
1054 entry->key.xid, entry->key.dbOid, entry->key.relOid);
1055 Assert(false);
1056 }
1057 /* ... and remove the per-lock hash entry */
1058 next = entry->next;
1060 }
1061
1062 xidentry->head = NULL; /* just for paranoia */
1063}
1064
1065/*
1066 * Release locks for specific XID, or all locks if it's InvalidXid.
1067 */
1068static void
1070{
1071 RecoveryLockXidEntry *entry;
1072
1073 if (TransactionIdIsValid(xid))
1074 {
1075 if ((entry = hash_search(RecoveryLockXidHash, &xid, HASH_FIND, NULL)))
1076 {
1079 }
1080 }
1081 else
1083}
1084
1085/*
1086 * Release locks for a transaction tree, starting at xid down, from
1087 * RecoveryLockXidHash.
1088 *
1089 * Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode,
1090 * to remove any AccessExclusiveLocks requested by a transaction.
1091 */
1092void
1094{
1095 int i;
1096
1098
1099 for (i = 0; i < nsubxids; i++)
1100 StandbyReleaseLocks(subxids[i]);
1101}
1102
1103/*
1104 * Called at end of recovery and when we see a shutdown checkpoint.
1105 */
1106void
1108{
1109 HASH_SEQ_STATUS status;
1110 RecoveryLockXidEntry *entry;
1111
1112 elog(DEBUG2, "release all standby locks");
1113
1115 while ((entry = hash_seq_search(&status)))
1116 {
1119 }
1120}
1121
1122/*
1123 * StandbyReleaseOldLocks
1124 * Release standby locks held by top-level XIDs that aren't running,
1125 * as long as they're not prepared transactions.
1126 *
1127 * This is needed to prune the locks of crashed transactions, which didn't
1128 * write an ABORT/COMMIT record.
1129 */
1130void
1132{
1133 HASH_SEQ_STATUS status;
1134 RecoveryLockXidEntry *entry;
1135
1137 while ((entry = hash_seq_search(&status)))
1138 {
1140
1141 /* Skip if prepared transaction. */
1143 continue;
1144
1145 /* Skip if >= oldxid. */
1146 if (!TransactionIdPrecedes(entry->xid, oldxid))
1147 continue;
1148
1149 /* Remove all locks and hash table entry. */
1152 }
1153}
1154
1155/*
1156 * --------------------------------------------------------------------
1157 * Recovery handling for Rmgr RM_STANDBY_ID
1158 *
1159 * These record types will only be created if XLogStandbyInfoActive()
1160 * --------------------------------------------------------------------
1161 */
1162
1163void
1165{
1166 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1167
1168 /* Backup blocks are not used in standby records */
1170
1171 /* Do nothing if we're not in hot standby mode */
1173 return;
1174
1175 if (info == XLOG_STANDBY_LOCK)
1176 {
1178 int i;
1179
1180 for (i = 0; i < xlrec->nlocks; i++)
1182 xlrec->locks[i].dbOid,
1183 xlrec->locks[i].relOid);
1184 }
1185 else if (info == XLOG_RUNNING_XACTS)
1186 {
1189
1190 running.xcnt = xlrec->xcnt;
1191 running.subxcnt = xlrec->subxcnt;
1192 running.subxid_status = xlrec->subxid_overflow ? SUBXIDS_MISSING : SUBXIDS_IN_ARRAY;
1193 running.nextXid = xlrec->nextXid;
1194 running.latestCompletedXid = xlrec->latestCompletedXid;
1195 running.oldestRunningXid = xlrec->oldestRunningXid;
1196 running.xids = xlrec->xids;
1197
1199
1200 /*
1201 * The startup process currently has no convenient way to schedule
1202 * stats to be reported. XLOG_RUNNING_XACTS records issued at a
1203 * regular cadence, making this a convenient location to report stats.
1204 * While these records aren't generated with wal_level=minimal, stats
1205 * also cannot be accessed during WAL replay.
1206 */
1207 pgstat_report_stat(true);
1208 }
1209 else if (info == XLOG_INVALIDATIONS)
1210 {
1212
1214 xlrec->nmsgs,
1215 xlrec->relcacheInitFileInval,
1216 xlrec->dbId,
1217 xlrec->tsId);
1218 }
1219 else
1220 elog(PANIC, "standby_redo: unknown op code %u", info);
1221}
1222
1223/*
1224 * Log details of the current snapshot to WAL. This allows the snapshot state
1225 * to be reconstructed on the standby and for logical decoding.
1226 *
1227 * This is used for Hot Standby as follows:
1228 *
1229 * We can move directly to STANDBY_SNAPSHOT_READY at startup if we
1230 * start from a shutdown checkpoint because we know nothing was running
1231 * at that time and our recovery snapshot is known empty. In the more
1232 * typical case of an online checkpoint we need to jump through a few
1233 * hoops to get a correct recovery snapshot and this requires a two or
1234 * sometimes a three stage process.
1235 *
1236 * The initial snapshot must contain all running xids and all current
1237 * AccessExclusiveLocks at a point in time on the standby. Assembling
1238 * that information while the server is running requires many and
1239 * various LWLocks, so we choose to derive that information piece by
1240 * piece and then re-assemble that info on the standby. When that
1241 * information is fully assembled we move to STANDBY_SNAPSHOT_READY.
1242 *
1243 * Since locking on the primary when we derive the information is not
1244 * strict, we note that there is a time window between the derivation and
1245 * writing to WAL of the derived information. That allows race conditions
1246 * that we must resolve, since xids and locks may enter or leave the
1247 * snapshot during that window. This creates the issue that an xid or
1248 * lock may start *after* the snapshot has been derived yet *before* the
1249 * snapshot is logged in the running xacts WAL record. We resolve this by
1250 * starting to accumulate changes at a point just prior to when we derive
1251 * the snapshot on the primary, then ignore duplicates when we later apply
1252 * the snapshot from the running xacts record. This is implemented during
1253 * CreateCheckPoint() where we use the logical checkpoint location as
1254 * our starting point and then write the running xacts record immediately
1255 * before writing the main checkpoint WAL record. Since we always start
1256 * up from a checkpoint and are immediately at our starting point, we
1257 * unconditionally move to STANDBY_INITIALIZED. After this point we
1258 * must do 4 things:
1259 * * move shared nextXid forwards as we see new xids
1260 * * extend the clog and subtrans with each new xid
1261 * * keep track of uncommitted known assigned xids
1262 * * keep track of uncommitted AccessExclusiveLocks
1263 *
1264 * When we see a commit/abort we must remove known assigned xids and locks
1265 * from the completing transaction. Attempted removals that cannot locate
1266 * an entry are expected and must not cause an error when we are in state
1267 * STANDBY_INITIALIZED. This is implemented in StandbyReleaseLocks() and
1268 * KnownAssignedXidsRemove().
1269 *
1270 * Later, when we apply the running xact data we must be careful to ignore
1271 * transactions already committed, since those commits raced ahead when
1272 * making WAL entries.
1273 *
1274 * For logical decoding only the running xacts information is needed;
1275 * there's no need to look at the locking information, but it's logged anyway,
1276 * as there's no independent knob to just enable logical decoding. For
1277 * details of how this is used, check snapbuild.c's introductory comment.
1278 *
1279 *
1280 * Returns the RecPtr of the last inserted record.
1281 */
1284{
1286 RunningTransactions running;
1287 xl_standby_lock *locks;
1288 int nlocks;
1289 bool logical_decoding_enabled = IsLogicalDecodingEnabled();
1290
1292
1293#ifdef USE_INJECTION_POINTS
1294 if (IS_INJECTION_POINT_ATTACHED("skip-log-running-xacts"))
1295 {
1296 /*
1297 * This record could move slot's xmin forward during decoding, leading
1298 * to unpredictable results, so skip it when requested by the test.
1299 */
1300 return GetInsertRecPtr();
1301 }
1302#endif
1303
1304 /*
1305 * Get details of any AccessExclusiveLocks being held at the moment.
1306 */
1307 locks = GetRunningTransactionLocks(&nlocks);
1308 if (nlocks > 0)
1309 LogAccessExclusiveLocks(nlocks, locks);
1310 pfree(locks);
1311
1312 /*
1313 * Log details of all in-progress transactions. This should be the last
1314 * record we write, because standby will open up when it sees this.
1315 */
1316 running = GetRunningTransactionData();
1317
1318 /*
1319 * GetRunningTransactionData() acquired ProcArrayLock, we must release it.
1320 * For Hot Standby this can be done before inserting the WAL record
1321 * because ProcArrayApplyRecoveryInfo() rechecks the commit status using
1322 * the clog. For logical decoding, though, the lock can't be released
1323 * early because the clog might be "in the future" from the POV of the
1324 * historic snapshot. This would allow for situations where we're waiting
1325 * for the end of a transaction listed in the xl_running_xacts record
1326 * which, according to the WAL, has committed before the xl_running_xacts
1327 * record. Fortunately this routine isn't executed frequently, and it's
1328 * only a shared lock.
1329 */
1330 if (!logical_decoding_enabled)
1332
1333 recptr = LogCurrentRunningXacts(running);
1334
1335 /* Release lock if we kept it longer ... */
1336 if (logical_decoding_enabled)
1338
1339 /* GetRunningTransactionData() acquired XidGenLock, we must release it */
1341
1342 return recptr;
1343}
1344
1345/*
1346 * Record an enhanced snapshot of running transactions into WAL.
1347 *
1348 * The definitions of RunningTransactionsData and xl_running_xacts are
1349 * similar. We keep them separate because xl_running_xacts is a contiguous
1350 * chunk of memory and never exists fully until it is assembled in WAL.
1351 * The inserted records are marked as not being important for durability,
1352 * to avoid triggering superfluous checkpoint / archiving activity.
1353 */
1354static XLogRecPtr
1356{
1359
1360 xlrec.xcnt = CurrRunningXacts->xcnt;
1361 xlrec.subxcnt = CurrRunningXacts->subxcnt;
1362 xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY);
1363 xlrec.nextXid = CurrRunningXacts->nextXid;
1364 xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
1365 xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
1366
1367 /* Header */
1371
1372 /* array of TransactionIds */
1373 if (xlrec.xcnt > 0)
1375 (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
1376
1378
1379 if (xlrec.subxid_overflow)
1380 elog(DEBUG2,
1381 "snapshot of %d running transactions overflowed (lsn %X/%08X oldest xid %u latest complete %u next xid %u)",
1382 CurrRunningXacts->xcnt,
1384 CurrRunningXacts->oldestRunningXid,
1385 CurrRunningXacts->latestCompletedXid,
1386 CurrRunningXacts->nextXid);
1387 else
1388 elog(DEBUG2,
1389 "snapshot of %d+%d running transaction ids (lsn %X/%08X oldest xid %u latest complete %u next xid %u)",
1390 CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt,
1392 CurrRunningXacts->oldestRunningXid,
1393 CurrRunningXacts->latestCompletedXid,
1394 CurrRunningXacts->nextXid);
1395
1396 /*
1397 * Ensure running_xacts information is synced to disk not too far in the
1398 * future. We don't want to stall anything though (i.e. use XLogFlush()),
1399 * so we let the wal writer do it during normal operation.
1400 * XLogSetAsyncXactLSN() conveniently will mark the LSN as to-be-synced
1401 * and nudge the WALWriter into action if sleeping. Check
1402 * XLogBackgroundFlush() for details why a record might not be flushed
1403 * without it.
1404 */
1406
1407 return recptr;
1408}
1409
1410/*
1411 * Wholesale logging of AccessExclusiveLocks. Other lock types need not be
1412 * logged, as described in backend/storage/lmgr/README.
1413 */
1414static void
1428
1429/*
1430 * Individual logging of AccessExclusiveLocks for use during LockAcquire()
1431 */
1432void
1434{
1436
1438
1439 xlrec.dbOid = dbOid;
1440 xlrec.relOid = relOid;
1441
1444}
1445
1446/*
1447 * Prepare to log an AccessExclusiveLock, for use during LockAcquire()
1448 */
1449void
1451{
1452 /*
1453 * Ensure that a TransactionId has been assigned to this transaction, for
1454 * two reasons, both related to lock release on the standby. First, we
1455 * must assign an xid so that RecordTransactionCommit() and
1456 * RecordTransactionAbort() do not optimise away the transaction
1457 * completion record which recovery relies upon to release locks. It's a
1458 * hack, but for a corner case not worth adding code for into the main
1459 * commit path. Second, we must assign an xid before the lock is recorded
1460 * in shared memory, otherwise a concurrently executing
1461 * GetRunningTransactionLocks() might see a lock associated with an
1462 * InvalidTransactionId which we later assert cannot happen.
1463 */
1465}
1466
1467/*
1468 * Emit WAL for invalidations. This currently is only used for commits without
1469 * an xid but which contain invalidations.
1470 */
1471void
1473 bool relcacheInitFileInval)
1474{
1476
1477 /* prepare record */
1478 memset(&xlrec, 0, sizeof(xlrec));
1479 xlrec.dbId = MyDatabaseId;
1481 xlrec.relcacheInitFileInval = relcacheInitFileInval;
1482 xlrec.nmsgs = nmsgs;
1483
1484 /* perform insertion */
1487 XLogRegisterData(msgs,
1488 nmsgs * sizeof(SharedInvalidationMessage));
1490}
1491
1492/* Return the description of recovery conflict */
1493static const char *
1495{
1496 const char *reasonDesc = _("unknown reason");
1497
1498 switch (reason)
1499 {
1501 reasonDesc = _("recovery conflict on buffer pin");
1502 break;
1504 reasonDesc = _("recovery conflict on lock");
1505 break;
1507 reasonDesc = _("recovery conflict on tablespace");
1508 break;
1510 reasonDesc = _("recovery conflict on snapshot");
1511 break;
1513 reasonDesc = _("recovery conflict on replication slot");
1514 break;
1516 reasonDesc = _("recovery conflict on deadlock");
1517 break;
1519 reasonDesc = _("recovery conflict on buffer deadlock");
1520 break;
1522 reasonDesc = _("recovery conflict on database");
1523 break;
1524 }
1525
1526 return reasonDesc;
1527}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition timestamp.c:1719
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1779
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1643
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1607
static int32 next
Definition blutils.c:225
static void cleanup(void)
Definition bootstrap.c:878
bool HoldingBufferPinThatDelaysRecovery(void)
Definition bufmgr.c:6676
uint8_t uint8
Definition c.h:577
#define Assert(condition)
Definition c.h:906
uint64_t uint64
Definition c.h:580
uint32_t uint32
Definition c.h:579
uint32 TransactionId
Definition c.h:699
#define OidIsValid(objectId)
Definition c.h:821
int64 TimestampTz
Definition timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:358
void hash_destroy(HTAB *hashp)
Definition dynahash.c:865
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1415
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1380
int errcode(int sqlerrcode)
Definition elog.c:874
#define _(x)
Definition elog.c:95
#define LOG
Definition elog.h:31
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define DEBUG2
Definition elog.h:29
#define PANIC
Definition elog.h:42
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
int int int int errdetail_log_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...) pg_attribute_printf(1
#define DEBUG4
Definition elog.h:27
ProcNumber MyProcNumber
Definition globals.c:90
Oid MyDatabaseTableSpace
Definition globals.c:96
Oid MyDatabaseId
Definition globals.c:94
@ HASH_FIND
Definition hsearch.h:113
@ HASH_REMOVE
Definition hsearch.h:115
@ HASH_ENTER
Definition hsearch.h:114
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_BLOBS
Definition hsearch.h:97
#define IS_INJECTION_POINT_ATTACHED(name)
void ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, int nmsgs, bool RelcacheInitFileInval, Oid dbid, Oid tsid)
Definition inval.c:1135
int i
Definition isn.c:77
LockAcquireResult LockAcquire(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock, bool dontWait)
Definition lock.c:809
void VirtualXactLockTableInsert(VirtualTransactionId vxid)
Definition lock.c:4619
bool LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
Definition lock.c:2102
void VirtualXactLockTableCleanup(void)
Definition lock.c:4642
bool VirtualXactLock(VirtualTransactionId vxid, bool wait)
Definition lock.c:4742
VirtualTransactionId * GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp)
Definition lock.c:3069
xl_standby_lock * GetRunningTransactionLocks(int *nlocks)
Definition lock.c:4170
#define VirtualTransactionIdIsValid(vxid)
Definition lock.h:69
#define SET_LOCKTAG_RELATION(locktag, dboid, reloid)
Definition lock.h:183
#define AccessExclusiveLock
Definition lockdefs.h:43
bool IsLogicalDecodingEnabled(void)
Definition logicalctl.c:205
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1794
void pfree(void *pointer)
Definition mcxt.c:1616
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
static char * errmsg
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define ERRCODE_T_R_DEADLOCK_DETECTED
Definition pgbench.c:78
long pgstat_report_stat(bool force)
Definition pgstat.c:704
#define InvalidOid
unsigned int Oid
static int fb(int x)
bool SignalRecoveryConflictWithVirtualXID(VirtualTransactionId vxid, RecoveryConflictReason reason)
Definition procarray.c:3489
void ExpireAllKnownAssignedTransactionIds(void)
Definition procarray.c:4510
RunningTransactions GetRunningTransactionData(void)
Definition procarray.c:2641
void SignalRecoveryConflictWithDatabase(Oid databaseid, RecoveryConflictReason reason)
Definition procarray.c:3534
int CountDBBackends(Oid databaseid)
Definition procarray.c:3634
PGPROC * ProcNumberGetProc(ProcNumber procNumber)
Definition procarray.c:3103
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition procarray.c:1058
VirtualTransactionId * GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
Definition procarray.c:3381
void set_ps_display_remove_suffix(void)
Definition ps_status.c:439
void set_ps_display_suffix(const char *suffix)
Definition ps_status.c:387
bool update_process_title
Definition ps_status.c:31
void pg_usleep(long microsec)
Definition signal.c:53
void SharedInvalBackendInit(bool sendOnly)
Definition sinvaladt.c:272
LocalTransactionId GetNextLocalTransactionId(void)
Definition sinvaladt.c:701
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition slot.c:2206
@ RS_INVAL_HORIZON
Definition slot.h:64
PGPROC * MyProc
Definition proc.c:68
int DeadlockTimeout
Definition proc.c:59
void ProcWaitForSignal(uint32 wait_event_info)
Definition proc.c:1972
void standby_redo(XLogReaderState *record)
Definition standby.c:1164
void ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition standby.c:514
static bool WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
Definition standby.c:235
static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, RecoveryConflictReason reason, uint32 wait_event_info, bool report_waiting)
Definition standby.c:361
static volatile sig_atomic_t got_standby_deadlock_timeout
Definition standby.c:70
static TimestampTz GetStandbyLimitTime(void)
Definition standby.c:202
void StandbyTimeoutHandler(void)
Definition standby.c:946
void ResolveRecoveryConflictWithBufferPin(void)
Definition standby.c:794
static volatile sig_atomic_t got_standby_delay_timeout
Definition standby.c:71
void StandbyLockTimeoutHandler(void)
Definition standby.c:955
static int standbyWait_us
Definition standby.c:227
static void StandbyReleaseXidEntryLocks(RecoveryLockXidEntry *xidentry)
Definition standby.c:1036
void StandbyDeadLockHandler(void)
Definition standby.c:937
static HTAB * RecoveryLockXidHash
Definition standby.c:67
XLogRecPtr LogStandbySnapshot(void)
Definition standby.c:1283
void CheckRecoveryConflictDeadlock(void)
Definition standby.c:906
void InitRecoveryTransactionEnvironment(void)
Definition standby.c:96
void ResolveRecoveryConflictWithTablespace(Oid tsid)
Definition standby.c:541
bool log_recovery_conflict_waits
Definition standby.c:43
#define STANDBY_INITIAL_WAIT_US
Definition standby.c:226
static volatile sig_atomic_t got_standby_lock_timeout
Definition standby.c:72
void ResolveRecoveryConflictWithDatabase(Oid dbid)
Definition standby.c:571
void StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition standby.c:1093
static void SendRecoveryConflictWithBufferPin(RecoveryConflictReason reason)
Definition standby.c:878
static const char * get_recovery_conflict_desc(RecoveryConflictReason reason)
Definition standby.c:1494
void StandbyReleaseOldLocks(TransactionId oldxid)
Definition standby.c:1131
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition standby.c:470
void LogAccessExclusiveLockPrepare(void)
Definition standby.c:1450
static HTAB * RecoveryLockHash
Definition standby.c:66
static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
Definition standby.c:1415
void LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs, bool relcacheInitFileInval)
Definition standby.c:1472
void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
Definition standby.c:987
static void StandbyReleaseLocks(TransactionId xid)
Definition standby.c:1069
void LogAccessExclusiveLock(Oid dbOid, Oid relOid)
Definition standby.c:1433
int max_standby_archive_delay
Definition standby.c:41
void LogRecoveryConflict(RecoveryConflictReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition standby.c:275
void StandbyReleaseAllLocks(void)
Definition standby.c:1107
int max_standby_streaming_delay
Definition standby.c:42
static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
Definition standby.c:1355
void ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
Definition standby.c:625
void ShutdownRecoveryTransactionEnvironment(void)
Definition standby.c:162
RecoveryConflictReason
Definition standby.h:29
@ RECOVERY_CONFLICT_TABLESPACE
Definition standby.h:34
@ RECOVERY_CONFLICT_SNAPSHOT
Definition standby.h:40
@ RECOVERY_CONFLICT_LOCK
Definition standby.h:37
@ RECOVERY_CONFLICT_DATABASE
Definition standby.h:31
@ RECOVERY_CONFLICT_STARTUP_DEADLOCK
Definition standby.h:53
@ RECOVERY_CONFLICT_BUFFERPIN
Definition standby.h:46
@ RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK
Definition standby.h:61
@ RECOVERY_CONFLICT_LOGICALSLOT
Definition standby.h:43
#define MinSizeOfXactRunningXacts
Definition standby.h:101
@ SUBXIDS_MISSING
Definition standby.h:119
@ SUBXIDS_IN_ARRAY
Definition standby.h:118
#define XLOG_INVALIDATIONS
Definition standbydefs.h:36
#define MinSizeOfInvalidations
Definition standbydefs.h:72
#define XLOG_STANDBY_LOCK
Definition standbydefs.h:34
#define XLOG_RUNNING_XACTS
Definition standbydefs.h:35
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
uint8 locktag_type
Definition lock.h:172
Definition proc.h:176
struct PGPROC::@133 vxid
pg_atomic_uint64 waitStart
Definition proc.h:303
ProcNumber procNumber
Definition proc.h:218
int pid
Definition proc.h:189
struct RecoveryLockEntry * next
Definition standby.c:57
xl_standby_lock key
Definition standby.c:56
TransactionId xid
Definition standby.c:62
struct RecoveryLockEntry * head
Definition standby.c:63
TransactionId oldestRunningXid
Definition standby.h:130
TransactionId nextXid
Definition standby.h:129
TransactionId latestCompletedXid
Definition standby.h:133
subxids_array_status subxid_status
Definition standby.h:128
TransactionId * xids
Definition standby.h:135
LocalTransactionId localTransactionId
Definition lock.h:64
ProcNumber procNumber
Definition lock.h:63
TransactionId xid
Definition lockdefs.h:53
void disable_all_timeouts(bool keep_indicators)
Definition timeout.c:751
void enable_timeouts(const EnableTimeoutParams *timeouts, int count)
Definition timeout.c:630
@ STANDBY_LOCK_TIMEOUT
Definition timeout.h:32
@ STANDBY_DEADLOCK_TIMEOUT
Definition timeout.h:30
@ STANDBY_TIMEOUT
Definition timeout.h:31
@ TMPARAM_AT
Definition timeout.h:54
@ TMPARAM_AFTER
Definition timeout.h:53
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
bool TransactionIdDidAbort(TransactionId transactionId)
Definition transam.c:188
#define InvalidTransactionId
Definition transam.h:31
#define U64FromFullTransactionId(x)
Definition transam.h:49
#define XidFromFullTransactionId(x)
Definition transam.h:48
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
#define MaxTransactionId
Definition transam.h:35
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
bool StandbyTransactionIdIsPrepared(TransactionId xid)
Definition twophase.c:1469
#define TimestampTzPlusMilliseconds(tz, ms)
Definition timestamp.h:85
FullTransactionId ReadNextFullTransactionId(void)
Definition varsup.c:288
#define PG_WAIT_LOCK
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85
static volatile sig_atomic_t waiting
TransactionId GetCurrentTransactionId(void)
Definition xact.c:456
int MyXactFlags
Definition xact.c:138
#define XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK
Definition xact.h:109
XLogRecPtr GetInsertRecPtr(void)
Definition xlog.c:6592
void XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
Definition xlog.c:2596
#define XLOG_MARK_UNIMPORTANT
Definition xlog.h:166
#define XLogStandbyInfoActive()
Definition xlog.h:125
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:368
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:460
void XLogBeginInsert(void)
Definition xloginsert.c:152
#define XLogRecGetInfo(decoder)
Definition xlogreader.h:409
#define XLogRecGetData(decoder)
Definition xlogreader.h:414
#define XLogRecHasAnyBlockRefs(decoder)
Definition xlogreader.h:416
void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream)
HotStandbyState standbyState
Definition xlogutils.c:53
bool InRecovery
Definition xlogutils.c:50
@ STANDBY_DISABLED
Definition xlogutils.h:52
@ STANDBY_INITIALIZED
Definition xlogutils.h:53
#define InHotStandby
Definition xlogutils.h:60