PostgreSQL Source Code  git master
standby.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * standby.c
4  * Misc functions used in Hot Standby mode.
5  *
6  * All functions for handling RM_STANDBY_ID, which relate to
7  * AccessExclusiveLocks and starting snapshots for Hot Standby mode.
8  * Plus conflict recovery processing.
9  *
10  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/storage/ipc/standby.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 #include "access/transam.h"
20 #include "access/twophase.h"
21 #include "access/xact.h"
22 #include "access/xloginsert.h"
23 #include "access/xlogrecovery.h"
24 #include "access/xlogutils.h"
25 #include "miscadmin.h"
26 #include "pgstat.h"
27 #include "replication/slot.h"
28 #include "storage/bufmgr.h"
29 #include "storage/proc.h"
30 #include "storage/procarray.h"
31 #include "storage/sinvaladt.h"
32 #include "storage/standby.h"
33 #include "utils/hsearch.h"
34 #include "utils/ps_status.h"
35 #include "utils/timeout.h"
36 #include "utils/timestamp.h"
37 
38 /* User-settable GUC parameters */
39 int max_standby_archive_delay = 30 * 1000;
42 
43 /*
44  * Keep track of all the exclusive locks owned by original transactions.
45  * For each known exclusive lock, there is a RecoveryLockEntry in the
46  * RecoveryLockHash hash table. All RecoveryLockEntrys belonging to a
47  * given XID are chained together so that we can find them easily.
48  * For each original transaction that is known to have any such locks,
49  * there is a RecoveryLockXidEntry in the RecoveryLockXidHash hash table,
50  * which stores the head of the chain of its locks.
51  */
52 typedef struct RecoveryLockEntry
53 {
54  xl_standby_lock key; /* hash key: xid, dbOid, relOid */
55  struct RecoveryLockEntry *next; /* chain link */
57 
58 typedef struct RecoveryLockXidEntry
59 {
60  TransactionId xid; /* hash key -- must be first */
61  struct RecoveryLockEntry *head; /* chain head */
63 
64 static HTAB *RecoveryLockHash = NULL;
65 static HTAB *RecoveryLockXidHash = NULL;
66 
67 /* Flags set by timeout handlers */
68 static volatile sig_atomic_t got_standby_deadlock_timeout = false;
69 static volatile sig_atomic_t got_standby_delay_timeout = false;
70 static volatile sig_atomic_t got_standby_lock_timeout = false;
71 
73  ProcSignalReason reason,
74  uint32 wait_event_info,
75  bool report_waiting);
78 static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
79 static const char *get_recovery_conflict_desc(ProcSignalReason reason);
80 
81 /*
82  * InitRecoveryTransactionEnvironment
83  * Initialize tracking of our primary's in-progress transactions.
84  *
85  * We need to issue shared invalidations and hold locks. Holding locks
86  * means others may want to wait on us, so we need to make a lock table
87  * vxact entry like a real transaction. We could create and delete
88  * lock table entries for each transaction but its simpler just to create
89  * one permanent entry and leave it there all the time. Locks are then
90  * acquired and released as needed. Yes, this means you can see the
91  * Startup process in pg_locks once we have run this.
92  */
93 void
95 {
97  HASHCTL hash_ctl;
98 
99  Assert(RecoveryLockHash == NULL); /* don't run this twice */
100 
101  /*
102  * Initialize the hash tables for tracking the locks held by each
103  * transaction.
104  */
105  hash_ctl.keysize = sizeof(xl_standby_lock);
106  hash_ctl.entrysize = sizeof(RecoveryLockEntry);
107  RecoveryLockHash = hash_create("RecoveryLockHash",
108  64,
109  &hash_ctl,
111  hash_ctl.keysize = sizeof(TransactionId);
112  hash_ctl.entrysize = sizeof(RecoveryLockXidEntry);
113  RecoveryLockXidHash = hash_create("RecoveryLockXidHash",
114  64,
115  &hash_ctl,
117 
118  /*
119  * Initialize shared invalidation management for Startup process, being
120  * careful to register ourselves as a sendOnly process so we don't need to
121  * read messages, nor will we get signaled when the queue starts filling
122  * up.
123  */
125 
126  /*
127  * Lock a virtual transaction id for Startup process.
128  *
129  * We need to do GetNextLocalTransactionId() because
130  * SharedInvalBackendInit() leaves localTransactionId invalid and the lock
131  * manager doesn't like that at all.
132  *
133  * Note that we don't need to run XactLockTableInsert() because nobody
134  * needs to wait on xids. That sounds a little strange, but table locks
135  * are held by vxids and row level locks are held by xids. All queries
136  * hold AccessShareLocks so never block while we write or lock new rows.
137  */
139  vxid.procNumber = MyProcNumber;
142 
144 }
145 
146 /*
147  * ShutdownRecoveryTransactionEnvironment
148  * Shut down transaction tracking
149  *
150  * Prepare to switch from hot standby mode to normal operation. Shut down
151  * recovery-time transaction tracking.
152  *
153  * This must be called even in shutdown of startup process if transaction
154  * tracking has been initialized. Otherwise some locks the tracked
155  * transactions were holding will not be released and may interfere with
156  * the processes still running (but will exit soon later) at the exit of
157  * startup process.
158  */
159 void
161 {
162  /*
163  * Do nothing if RecoveryLockHash is NULL because that means that
164  * transaction tracking has not yet been initialized or has already been
165  * shut down. This makes it safe to have possibly-redundant calls of this
166  * function during process exit.
167  */
168  if (RecoveryLockHash == NULL)
169  return;
170 
171  /* Mark all tracked in-progress transactions as finished. */
173 
174  /* Release all locks the tracked transactions were holding */
176 
177  /* Destroy the lock hash tables. */
180  RecoveryLockHash = NULL;
181  RecoveryLockXidHash = NULL;
182 
183  /* Cleanup our VirtualTransaction */
185 }
186 
187 
188 /*
189  * -----------------------------------------------------
190  * Standby wait timers and backend cancel logic
191  * -----------------------------------------------------
192  */
193 
194 /*
195  * Determine the cutoff time at which we want to start canceling conflicting
196  * transactions. Returns zero (a time safely in the past) if we are willing
197  * to wait forever.
198  */
199 static TimestampTz
201 {
202  TimestampTz rtime;
203  bool fromStream;
204 
205  /*
206  * The cutoff time is the last WAL data receipt time plus the appropriate
207  * delay variable. Delay of -1 means wait forever.
208  */
209  GetXLogReceiptTime(&rtime, &fromStream);
210  if (fromStream)
211  {
213  return 0; /* wait forever */
215  }
216  else
217  {
219  return 0; /* wait forever */
221  }
222 }
223 
224 #define STANDBY_INITIAL_WAIT_US 1000
226 
227 /*
228  * Standby wait logic for ResolveRecoveryConflictWithVirtualXIDs.
229  * We wait here for a while then return. If we decide we can't wait any
230  * more then we return true, if we can wait some more return false.
231  */
232 static bool
234 {
235  TimestampTz ltime;
236 
238 
239  /* Are we past the limit time? */
240  ltime = GetStandbyLimitTime();
241  if (ltime && GetCurrentTimestamp() >= ltime)
242  return true;
243 
244  /*
245  * Sleep a bit (this is essential to avoid busy-waiting).
246  */
247  pgstat_report_wait_start(wait_event_info);
250 
251  /*
252  * Progressively increase the sleep times, but not to more than 1s, since
253  * pg_usleep isn't interruptible on some platforms.
254  */
255  standbyWait_us *= 2;
256  if (standbyWait_us > 1000000)
257  standbyWait_us = 1000000;
258 
259  return false;
260 }
261 
262 /*
263  * Log the recovery conflict.
264  *
265  * wait_start is the timestamp when the caller started to wait.
266  * now is the timestamp when this function has been called.
267  * wait_list is the list of virtual transaction ids assigned to
268  * conflicting processes. still_waiting indicates whether
269  * the startup process is still waiting for the recovery conflict
270  * to be resolved or not.
271  */
272 void
275  bool still_waiting)
276 {
277  long secs;
278  int usecs;
279  long msecs;
281  int nprocs = 0;
282 
283  /*
284  * There must be no conflicting processes when the recovery conflict has
285  * already been resolved.
286  */
287  Assert(still_waiting || wait_list == NULL);
288 
289  TimestampDifference(wait_start, now, &secs, &usecs);
290  msecs = secs * 1000 + usecs / 1000;
291  usecs = usecs % 1000;
292 
293  if (wait_list)
294  {
295  VirtualTransactionId *vxids;
296 
297  /* Construct a string of list of the conflicting processes */
298  vxids = wait_list;
299  while (VirtualTransactionIdIsValid(*vxids))
300  {
301  PGPROC *proc = ProcNumberGetProc(vxids->procNumber);
302 
303  /* proc can be NULL if the target backend is not active */
304  if (proc)
305  {
306  if (nprocs == 0)
307  {
309  appendStringInfo(&buf, "%d", proc->pid);
310  }
311  else
312  appendStringInfo(&buf, ", %d", proc->pid);
313 
314  nprocs++;
315  }
316 
317  vxids++;
318  }
319  }
320 
321  /*
322  * If wait_list is specified, report the list of PIDs of active
323  * conflicting backends in a detail message. Note that if all the backends
324  * in the list are not active, no detail message is logged.
325  */
326  if (still_waiting)
327  {
328  ereport(LOG,
329  errmsg("recovery still waiting after %ld.%03d ms: %s",
330  msecs, usecs, get_recovery_conflict_desc(reason)),
331  nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
332  "Conflicting processes: %s.",
333  nprocs, buf.data) : 0);
334  }
335  else
336  {
337  ereport(LOG,
338  errmsg("recovery finished waiting after %ld.%03d ms: %s",
339  msecs, usecs, get_recovery_conflict_desc(reason)));
340  }
341 
342  if (nprocs > 0)
343  pfree(buf.data);
344 }
345 
346 /*
347  * This is the main executioner for any query backend that conflicts with
348  * recovery processing. Judgement has already been passed on it within
349  * a specific rmgr. Here we just issue the orders to the procs. The procs
350  * then throw the required error as instructed.
351  *
352  * If report_waiting is true, "waiting" is reported in PS display and the
353  * wait for recovery conflict is reported in the log, if necessary. If
354  * the caller is responsible for reporting them, report_waiting should be
355  * false. Otherwise, both the caller and this function report the same
356  * thing unexpectedly.
357  */
358 static void
360  ProcSignalReason reason, uint32 wait_event_info,
361  bool report_waiting)
362 {
363  TimestampTz waitStart = 0;
364  bool waiting = false;
365  bool logged_recovery_conflict = false;
366 
367  /* Fast exit, to avoid a kernel call if there's no work to be done. */
368  if (!VirtualTransactionIdIsValid(*waitlist))
369  return;
370 
371  /* Set the wait start timestamp for reporting */
372  if (report_waiting && (log_recovery_conflict_waits || update_process_title))
373  waitStart = GetCurrentTimestamp();
374 
375  while (VirtualTransactionIdIsValid(*waitlist))
376  {
377  /* reset standbyWait_us for each xact we wait for */
379 
380  /* wait until the virtual xid is gone */
381  while (!VirtualXactLock(*waitlist, false))
382  {
383  /* Is it time to kill it? */
384  if (WaitExceedsMaxStandbyDelay(wait_event_info))
385  {
386  pid_t pid;
387 
388  /*
389  * Now find out who to throw out of the balloon.
390  */
392  pid = CancelVirtualTransaction(*waitlist, reason);
393 
394  /*
395  * Wait a little bit for it to die so that we avoid flooding
396  * an unresponsive backend when system is heavily loaded.
397  */
398  if (pid != 0)
399  pg_usleep(5000L);
400  }
401 
402  if (waitStart != 0 && (!logged_recovery_conflict || !waiting))
403  {
404  TimestampTz now = 0;
405  bool maybe_log_conflict;
406  bool maybe_update_title;
407 
408  maybe_log_conflict = (log_recovery_conflict_waits && !logged_recovery_conflict);
409  maybe_update_title = (update_process_title && !waiting);
410 
411  /* Get the current timestamp if not report yet */
412  if (maybe_log_conflict || maybe_update_title)
414 
415  /*
416  * Report via ps if we have been waiting for more than 500
417  * msec (should that be configurable?)
418  */
419  if (maybe_update_title &&
420  TimestampDifferenceExceeds(waitStart, now, 500))
421  {
422  set_ps_display_suffix("waiting");
423  waiting = true;
424  }
425 
426  /*
427  * Emit the log message if the startup process is waiting
428  * longer than deadlock_timeout for recovery conflict.
429  */
430  if (maybe_log_conflict &&
432  {
433  LogRecoveryConflict(reason, waitStart, now, waitlist, true);
434  logged_recovery_conflict = true;
435  }
436  }
437  }
438 
439  /* The virtual transaction is gone now, wait for the next one */
440  waitlist++;
441  }
442 
443  /*
444  * Emit the log message if recovery conflict was resolved but the startup
445  * process waited longer than deadlock_timeout for it.
446  */
447  if (logged_recovery_conflict)
448  LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
449  NULL, false);
450 
451  /* reset ps display to remove the suffix if we added one */
452  if (waiting)
454 
455 }
456 
457 /*
458  * Generate whatever recovery conflicts are needed to eliminate snapshots that
459  * might see XIDs <= snapshotConflictHorizon as still running.
460  *
461  * snapshotConflictHorizon cutoffs are our standard approach to generating
462  * granular recovery conflicts. Note that InvalidTransactionId values are
463  * interpreted as "definitely don't need any conflicts" here, which is a
464  * general convention that WAL records can (and often do) depend on.
465  */
466 void
468  bool isCatalogRel,
469  RelFileLocator locator)
470 {
471  VirtualTransactionId *backends;
472 
473  /*
474  * If we get passed InvalidTransactionId then we do nothing (no conflict).
475  *
476  * This can happen when replaying already-applied WAL records after a
477  * standby crash or restart, or when replaying an XLOG_HEAP2_VISIBLE
478  * record that marks as frozen a page which was already all-visible. It's
479  * also quite common with records generated during index deletion
480  * (original execution of the deletion can reason that a recovery conflict
481  * which is sufficient for the deletion operation must take place before
482  * replay of the deletion record itself).
483  */
484  if (!TransactionIdIsValid(snapshotConflictHorizon))
485  return;
486 
487  Assert(TransactionIdIsNormal(snapshotConflictHorizon));
488  backends = GetConflictingVirtualXIDs(snapshotConflictHorizon,
489  locator.dbOid);
492  WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT,
493  true);
494 
495  /*
496  * Note that WaitExceedsMaxStandbyDelay() is not taken into account here
497  * (as opposed to ResolveRecoveryConflictWithVirtualXIDs() above). That
498  * seems OK, given that this kind of conflict should not normally be
499  * reached, e.g. due to using a physical replication slot.
500  */
501  if (wal_level >= WAL_LEVEL_LOGICAL && isCatalogRel)
503  snapshotConflictHorizon);
504 }
505 
506 /*
507  * Variant of ResolveRecoveryConflictWithSnapshot that works with
508  * FullTransactionId values
509  */
510 void
512  bool isCatalogRel,
513  RelFileLocator locator)
514 {
515  /*
516  * ResolveRecoveryConflictWithSnapshot operates on 32-bit TransactionIds,
517  * so truncate the logged FullTransactionId. If the logged value is very
518  * old, so that XID wrap-around already happened on it, there can't be any
519  * snapshots that still see it.
520  */
522  uint64 diff;
523 
524  diff = U64FromFullTransactionId(nextXid) -
525  U64FromFullTransactionId(snapshotConflictHorizon);
526  if (diff < MaxTransactionId / 2)
527  {
528  TransactionId truncated;
529 
530  truncated = XidFromFullTransactionId(snapshotConflictHorizon);
532  isCatalogRel,
533  locator);
534  }
535 }
536 
537 void
539 {
540  VirtualTransactionId *temp_file_users;
541 
542  /*
543  * Standby users may be currently using this tablespace for their
544  * temporary files. We only care about current users because
545  * temp_tablespace parameter will just ignore tablespaces that no longer
546  * exist.
547  *
548  * Ask everybody to cancel their queries immediately so we can ensure no
549  * temp files remain and we can remove the tablespace. Nuke the entire
550  * site from orbit, it's the only way to be sure.
551  *
552  * XXX: We could work out the pids of active backends using this
553  * tablespace by examining the temp filenames in the directory. We would
554  * then convert the pids into VirtualXIDs before attempting to cancel
555  * them.
556  *
557  * We don't wait for commit because drop tablespace is non-transactional.
558  */
560  InvalidOid);
563  WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE,
564  true);
565 }
566 
567 void
569 {
570  /*
571  * We don't do ResolveRecoveryConflictWithVirtualXIDs() here since that
572  * only waits for transactions and completely idle sessions would block
573  * us. This is rare enough that we do this as simply as possible: no wait,
574  * just force them off immediately.
575  *
576  * No locking is required here because we already acquired
577  * AccessExclusiveLock. Anybody trying to connect while we do this will
578  * block during InitPostgres() and then disconnect when they see the
579  * database has been removed.
580  */
581  while (CountDBBackends(dbid) > 0)
582  {
584 
585  /*
586  * Wait awhile for them to die so that we avoid flooding an
587  * unresponsive backend when system is heavily loaded.
588  */
589  pg_usleep(10000);
590  }
591 }
592 
593 /*
594  * ResolveRecoveryConflictWithLock is called from ProcSleep()
595  * to resolve conflicts with other backends holding relation locks.
596  *
597  * The WaitLatch sleep normally done in ProcSleep()
598  * (when not InHotStandby) is performed here, for code clarity.
599  *
600  * We either resolve conflicts immediately or set a timeout to wake us at
601  * the limit of our patience.
602  *
603  * Resolve conflicts by canceling to all backends holding a conflicting
604  * lock. As we are already queued to be granted the lock, no new lock
605  * requests conflicting with ours will be granted in the meantime.
606  *
607  * We also must check for deadlocks involving the Startup process and
608  * hot-standby backend processes. If deadlock_timeout is reached in
609  * this function, all the backends holding the conflicting locks are
610  * requested to check themselves for deadlocks.
611  *
612  * logging_conflict should be true if the recovery conflict has not been
613  * logged yet even though logging is enabled. After deadlock_timeout is
614  * reached and the request for deadlock check is sent, we wait again to
615  * be signaled by the release of the lock if logging_conflict is false.
616  * Otherwise we return without waiting again so that the caller can report
617  * the recovery conflict. In this case, then, this function is called again
618  * with logging_conflict=false (because the recovery conflict has already
619  * been logged) and we will wait again for the lock to be released.
620  */
621 void
622 ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
623 {
624  TimestampTz ltime;
626 
628 
629  ltime = GetStandbyLimitTime();
631 
632  /*
633  * Update waitStart if first time through after the startup process
634  * started waiting for the lock. It should not be updated every time
635  * ResolveRecoveryConflictWithLock() is called during the wait.
636  *
637  * Use the current time obtained for comparison with ltime as waitStart
638  * (i.e., the time when this process started waiting for the lock). Since
639  * getting the current time newly can cause overhead, we reuse the
640  * already-obtained time to avoid that overhead.
641  *
642  * Note that waitStart is updated without holding the lock table's
643  * partition lock, to avoid the overhead by additional lock acquisition.
644  * This can cause "waitstart" in pg_locks to become NULL for a very short
645  * period of time after the wait started even though "granted" is false.
646  * This is OK in practice because we can assume that users are likely to
647  * look at "waitstart" when waiting for the lock for a long time.
648  */
649  if (pg_atomic_read_u64(&MyProc->waitStart) == 0)
651 
652  if (now >= ltime && ltime != 0)
653  {
654  /*
655  * We're already behind, so clear a path as quickly as possible.
656  */
657  VirtualTransactionId *backends;
658 
659  backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
660 
661  /*
662  * Prevent ResolveRecoveryConflictWithVirtualXIDs() from reporting
663  * "waiting" in PS display by disabling its argument report_waiting
664  * because the caller, WaitOnLock(), has already reported that.
665  */
668  PG_WAIT_LOCK | locktag.locktag_type,
669  false);
670  }
671  else
672  {
673  /*
674  * Wait (or wait again) until ltime, and check for deadlocks as well
675  * if we will be waiting longer than deadlock_timeout
676  */
677  EnableTimeoutParams timeouts[2];
678  int cnt = 0;
679 
680  if (ltime != 0)
681  {
682  got_standby_lock_timeout = false;
683  timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
684  timeouts[cnt].type = TMPARAM_AT;
685  timeouts[cnt].fin_time = ltime;
686  cnt++;
687  }
688 
690  timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
691  timeouts[cnt].type = TMPARAM_AFTER;
692  timeouts[cnt].delay_ms = DeadlockTimeout;
693  cnt++;
694 
695  enable_timeouts(timeouts, cnt);
696  }
697 
698  /* Wait to be signaled by the release of the Relation Lock */
700 
701  /*
702  * Exit if ltime is reached. Then all the backends holding conflicting
703  * locks will be canceled in the next ResolveRecoveryConflictWithLock()
704  * call.
705  */
707  goto cleanup;
708 
710  {
711  VirtualTransactionId *backends;
712 
713  backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
714 
715  /* Quick exit if there's no work to be done */
716  if (!VirtualTransactionIdIsValid(*backends))
717  goto cleanup;
718 
719  /*
720  * Send signals to all the backends holding the conflicting locks, to
721  * ask them to check themselves for deadlocks.
722  */
723  while (VirtualTransactionIdIsValid(*backends))
724  {
725  SignalVirtualTransaction(*backends,
727  false);
728  backends++;
729  }
730 
731  /*
732  * Exit if the recovery conflict has not been logged yet even though
733  * logging is enabled, so that the caller can log that. Then
734  * RecoveryConflictWithLock() is called again and we will wait again
735  * for the lock to be released.
736  */
737  if (logging_conflict)
738  goto cleanup;
739 
740  /*
741  * Wait again here to be signaled by the release of the Relation Lock,
742  * to prevent the subsequent RecoveryConflictWithLock() from causing
743  * deadlock_timeout and sending a request for deadlocks check again.
744  * Otherwise the request continues to be sent every deadlock_timeout
745  * until the relation locks are released or ltime is reached.
746  */
749  }
750 
751 cleanup:
752 
753  /*
754  * Clear any timeout requests established above. We assume here that the
755  * Startup process doesn't have any other outstanding timeouts than those
756  * used by this function. If that stops being true, we could cancel the
757  * timeouts individually, but that'd be slower.
758  */
759  disable_all_timeouts(false);
760  got_standby_lock_timeout = false;
762 }
763 
764 /*
765  * ResolveRecoveryConflictWithBufferPin is called from LockBufferForCleanup()
766  * to resolve conflicts with other backends holding buffer pins.
767  *
768  * The ProcWaitForSignal() sleep normally done in LockBufferForCleanup()
769  * (when not InHotStandby) is performed here, for code clarity.
770  *
771  * We either resolve conflicts immediately or set a timeout to wake us at
772  * the limit of our patience.
773  *
774  * Resolve conflicts by sending a PROCSIG signal to all backends to check if
775  * they hold one of the buffer pins that is blocking Startup process. If so,
776  * those backends will take an appropriate error action, ERROR or FATAL.
777  *
778  * We also must check for deadlocks. Deadlocks occur because if queries
779  * wait on a lock, that must be behind an AccessExclusiveLock, which can only
780  * be cleared if the Startup process replays a transaction completion record.
781  * If Startup process is also waiting then that is a deadlock. The deadlock
782  * can occur if the query is waiting and then the Startup sleeps, or if
783  * Startup is sleeping and the query waits on a lock. We protect against
784  * only the former sequence here, the latter sequence is checked prior to
785  * the query sleeping, in CheckRecoveryConflictDeadlock().
786  *
787  * Deadlocks are extremely rare, and relatively expensive to check for,
788  * so we don't do a deadlock check right away ... only if we have had to wait
789  * at least deadlock_timeout.
790  */
791 void
793 {
794  TimestampTz ltime;
795 
797 
798  ltime = GetStandbyLimitTime();
799 
800  if (GetCurrentTimestamp() >= ltime && ltime != 0)
801  {
802  /*
803  * We're already behind, so clear a path as quickly as possible.
804  */
806  }
807  else
808  {
809  /*
810  * Wake up at ltime, and check for deadlocks as well if we will be
811  * waiting longer than deadlock_timeout
812  */
813  EnableTimeoutParams timeouts[2];
814  int cnt = 0;
815 
816  if (ltime != 0)
817  {
818  timeouts[cnt].id = STANDBY_TIMEOUT;
819  timeouts[cnt].type = TMPARAM_AT;
820  timeouts[cnt].fin_time = ltime;
821  cnt++;
822  }
823 
825  timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
826  timeouts[cnt].type = TMPARAM_AFTER;
827  timeouts[cnt].delay_ms = DeadlockTimeout;
828  cnt++;
829 
830  enable_timeouts(timeouts, cnt);
831  }
832 
833  /*
834  * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
835  * by one of the timeouts established above.
836  *
837  * We assume that only UnpinBuffer() and the timeout requests established
838  * above can wake us up here. WakeupRecovery() called by walreceiver or
839  * SIGHUP signal handler, etc cannot do that because it uses the different
840  * latch from that ProcWaitForSignal() waits on.
841  */
842  ProcWaitForSignal(WAIT_EVENT_BUFFER_PIN);
843 
847  {
848  /*
849  * Send out a request for hot-standby backends to check themselves for
850  * deadlocks.
851  *
852  * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
853  * to be signaled by UnpinBuffer() again and send a request for
854  * deadlocks check if deadlock_timeout happens. This causes the
855  * request to continue to be sent every deadlock_timeout until the
856  * buffer is unpinned or ltime is reached. This would increase the
857  * workload in the startup process and backends. In practice it may
858  * not be so harmful because the period that the buffer is kept pinned
859  * is basically no so long. But we should fix this?
860  */
862  }
863 
864  /*
865  * Clear any timeout requests established above. We assume here that the
866  * Startup process doesn't have any other timeouts than what this function
867  * uses. If that stops being true, we could cancel the timeouts
868  * individually, but that'd be slower.
869  */
870  disable_all_timeouts(false);
873 }
874 
875 static void
877 {
880 
881  /*
882  * We send signal to all backends to ask them if they are holding the
883  * buffer pin which is delaying the Startup process. We must not set the
884  * conflict flag yet, since most backends will be innocent. Let the
885  * SIGUSR1 handling in each backend decide their own fate.
886  */
887  CancelDBBackends(InvalidOid, reason, false);
888 }
889 
890 /*
891  * In Hot Standby perform early deadlock detection. We abort the lock
892  * wait if we are about to sleep while holding the buffer pin that Startup
893  * process is waiting for.
894  *
895  * Note: this code is pessimistic, because there is no way for it to
896  * determine whether an actual deadlock condition is present: the lock we
897  * need to wait for might be unrelated to any held by the Startup process.
898  * Sooner or later, this mechanism should get ripped out in favor of somehow
899  * accounting for buffer locks in DeadLockCheck(). However, errors here
900  * seem to be very low-probability in practice, so for now it's not worth
901  * the trouble.
902  */
903 void
905 {
906  Assert(!InRecovery); /* do not call in Startup process */
907 
909  return;
910 
911  /*
912  * Error message should match ProcessInterrupts() but we avoid calling
913  * that because we aren't handling an interrupt at this point. Note that
914  * we only cancel the current transaction here, so if we are in a
915  * subtransaction and the pin is held by a parent, then the Startup
916  * process will continue to wait even though we have avoided deadlock.
917  */
918  ereport(ERROR,
920  errmsg("canceling statement due to conflict with recovery"),
921  errdetail("User transaction caused buffer deadlock with recovery.")));
922 }
923 
924 
925 /* --------------------------------
926  * timeout handler routines
927  * --------------------------------
928  */
929 
930 /*
931  * StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT is
932  * exceeded.
933  */
934 void
936 {
938 }
939 
940 /*
941  * StandbyTimeoutHandler() will be called if STANDBY_TIMEOUT is exceeded.
942  */
943 void
945 {
947 }
948 
949 /*
950  * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
951  */
952 void
954 {
956 }
957 
958 /*
959  * -----------------------------------------------------
960  * Locking in Recovery Mode
961  * -----------------------------------------------------
962  *
963  * All locks are held by the Startup process using a single virtual
964  * transaction. This implementation is both simpler and in some senses,
965  * more correct. The locks held mean "some original transaction held
966  * this lock, so query access is not allowed at this time". So the Startup
967  * process is the proxy by which the original locks are implemented.
968  *
969  * We only keep track of AccessExclusiveLocks, which are only ever held by
970  * one transaction on one relation.
971  *
972  * We keep a table of known locks in the RecoveryLockHash hash table.
973  * The point of that table is to let us efficiently de-duplicate locks,
974  * which is important because checkpoints will re-report the same locks
975  * already held. There is also a RecoveryLockXidHash table with one entry
976  * per xid, which allows us to efficiently find all the locks held by a
977  * given original transaction.
978  *
979  * We use session locks rather than normal locks so we don't need
980  * ResourceOwners.
981  */
982 
983 
984 void
986 {
987  RecoveryLockXidEntry *xidentry;
988  RecoveryLockEntry *lockentry;
990  LOCKTAG locktag;
991  bool found;
992 
993  /* Already processed? */
994  if (!TransactionIdIsValid(xid) ||
995  TransactionIdDidCommit(xid) ||
997  return;
998 
999  elog(DEBUG4, "adding recovery lock: db %u rel %u", dbOid, relOid);
1000 
1001  /* dbOid is InvalidOid when we are locking a shared relation. */
1002  Assert(OidIsValid(relOid));
1003 
1004  /* Create a hash entry for this xid, if we don't have one already. */
1005  xidentry = hash_search(RecoveryLockXidHash, &xid, HASH_ENTER, &found);
1006  if (!found)
1007  {
1008  Assert(xidentry->xid == xid); /* dynahash should have set this */
1009  xidentry->head = NULL;
1010  }
1011 
1012  /* Create a hash entry for this lock, unless we have one already. */
1013  key.xid = xid;
1014  key.dbOid = dbOid;
1015  key.relOid = relOid;
1016  lockentry = hash_search(RecoveryLockHash, &key, HASH_ENTER, &found);
1017  if (!found)
1018  {
1019  /* It's new, so link it into the XID's list ... */
1020  lockentry->next = xidentry->head;
1021  xidentry->head = lockentry;
1022 
1023  /* ... and acquire the lock locally. */
1024  SET_LOCKTAG_RELATION(locktag, dbOid, relOid);
1025 
1026  (void) LockAcquire(&locktag, AccessExclusiveLock, true, false);
1027  }
1028 }
1029 
1030 /*
1031  * Release all the locks associated with this RecoveryLockXidEntry.
1032  */
1033 static void
1035 {
1036  RecoveryLockEntry *entry;
1038 
1039  for (entry = xidentry->head; entry != NULL; entry = next)
1040  {
1041  LOCKTAG locktag;
1042 
1043  elog(DEBUG4,
1044  "releasing recovery lock: xid %u db %u rel %u",
1045  entry->key.xid, entry->key.dbOid, entry->key.relOid);
1046  /* Release the lock ... */
1047  SET_LOCKTAG_RELATION(locktag, entry->key.dbOid, entry->key.relOid);
1048  if (!LockRelease(&locktag, AccessExclusiveLock, true))
1049  {
1050  elog(LOG,
1051  "RecoveryLockHash contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u",
1052  entry->key.xid, entry->key.dbOid, entry->key.relOid);
1053  Assert(false);
1054  }
1055  /* ... and remove the per-lock hash entry */
1056  next = entry->next;
1057  hash_search(RecoveryLockHash, entry, HASH_REMOVE, NULL);
1058  }
1059 
1060  xidentry->head = NULL; /* just for paranoia */
1061 }
1062 
1063 /*
1064  * Release locks for specific XID, or all locks if it's InvalidXid.
1065  */
1066 static void
1068 {
1069  RecoveryLockXidEntry *entry;
1070 
1071  if (TransactionIdIsValid(xid))
1072  {
1073  if ((entry = hash_search(RecoveryLockXidHash, &xid, HASH_FIND, NULL)))
1074  {
1077  }
1078  }
1079  else
1081 }
1082 
1083 /*
1084  * Release locks for a transaction tree, starting at xid down, from
1085  * RecoveryLockXidHash.
1086  *
1087  * Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode,
1088  * to remove any AccessExclusiveLocks requested by a transaction.
1089  */
1090 void
1092 {
1093  int i;
1094 
1095  StandbyReleaseLocks(xid);
1096 
1097  for (i = 0; i < nsubxids; i++)
1098  StandbyReleaseLocks(subxids[i]);
1099 }
1100 
1101 /*
1102  * Called at end of recovery and when we see a shutdown checkpoint.
1103  */
1104 void
1106 {
1107  HASH_SEQ_STATUS status;
1108  RecoveryLockXidEntry *entry;
1109 
1110  elog(DEBUG2, "release all standby locks");
1111 
1113  while ((entry = hash_seq_search(&status)))
1114  {
1117  }
1118 }
1119 
1120 /*
1121  * StandbyReleaseOldLocks
1122  * Release standby locks held by top-level XIDs that aren't running,
1123  * as long as they're not prepared transactions.
1124  *
1125  * This is needed to prune the locks of crashed transactions, which didn't
1126  * write an ABORT/COMMIT record.
1127  */
1128 void
1130 {
1131  HASH_SEQ_STATUS status;
1132  RecoveryLockXidEntry *entry;
1133 
1135  while ((entry = hash_seq_search(&status)))
1136  {
1137  Assert(TransactionIdIsValid(entry->xid));
1138 
1139  /* Skip if prepared transaction. */
1140  if (StandbyTransactionIdIsPrepared(entry->xid))
1141  continue;
1142 
1143  /* Skip if >= oldxid. */
1144  if (!TransactionIdPrecedes(entry->xid, oldxid))
1145  continue;
1146 
1147  /* Remove all locks and hash table entry. */
1150  }
1151 }
1152 
1153 /*
1154  * --------------------------------------------------------------------
1155  * Recovery handling for Rmgr RM_STANDBY_ID
1156  *
1157  * These record types will only be created if XLogStandbyInfoActive()
1158  * --------------------------------------------------------------------
1159  */
1160 
1161 void
1163 {
1164  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1165 
1166  /* Backup blocks are not used in standby records */
1167  Assert(!XLogRecHasAnyBlockRefs(record));
1168 
1169  /* Do nothing if we're not in hot standby mode */
1171  return;
1172 
1173  if (info == XLOG_STANDBY_LOCK)
1174  {
1175  xl_standby_locks *xlrec = (xl_standby_locks *) XLogRecGetData(record);
1176  int i;
1177 
1178  for (i = 0; i < xlrec->nlocks; i++)
1180  xlrec->locks[i].dbOid,
1181  xlrec->locks[i].relOid);
1182  }
1183  else if (info == XLOG_RUNNING_XACTS)
1184  {
1185  xl_running_xacts *xlrec = (xl_running_xacts *) XLogRecGetData(record);
1186  RunningTransactionsData running;
1187 
1188  running.xcnt = xlrec->xcnt;
1189  running.subxcnt = xlrec->subxcnt;
1191  running.nextXid = xlrec->nextXid;
1192  running.latestCompletedXid = xlrec->latestCompletedXid;
1193  running.oldestRunningXid = xlrec->oldestRunningXid;
1194  running.xids = xlrec->xids;
1195 
1196  ProcArrayApplyRecoveryInfo(&running);
1197 
1198  /*
1199  * The startup process currently has no convenient way to schedule
1200  * stats to be reported. XLOG_RUNNING_XACTS records issued at a
1201  * regular cadence, making this a convenient location to report stats.
1202  * While these records aren't generated with wal_level=minimal, stats
1203  * also cannot be accessed during WAL replay.
1204  */
1205  pgstat_report_stat(true);
1206  }
1207  else if (info == XLOG_INVALIDATIONS)
1208  {
1209  xl_invalidations *xlrec = (xl_invalidations *) XLogRecGetData(record);
1210 
1212  xlrec->nmsgs,
1213  xlrec->relcacheInitFileInval,
1214  xlrec->dbId,
1215  xlrec->tsId);
1216  }
1217  else
1218  elog(PANIC, "standby_redo: unknown op code %u", info);
1219 }
1220 
1221 /*
1222  * Log details of the current snapshot to WAL. This allows the snapshot state
1223  * to be reconstructed on the standby and for logical decoding.
1224  *
1225  * This is used for Hot Standby as follows:
1226  *
1227  * We can move directly to STANDBY_SNAPSHOT_READY at startup if we
1228  * start from a shutdown checkpoint because we know nothing was running
1229  * at that time and our recovery snapshot is known empty. In the more
1230  * typical case of an online checkpoint we need to jump through a few
1231  * hoops to get a correct recovery snapshot and this requires a two or
1232  * sometimes a three stage process.
1233  *
1234  * The initial snapshot must contain all running xids and all current
1235  * AccessExclusiveLocks at a point in time on the standby. Assembling
1236  * that information while the server is running requires many and
1237  * various LWLocks, so we choose to derive that information piece by
1238  * piece and then re-assemble that info on the standby. When that
1239  * information is fully assembled we move to STANDBY_SNAPSHOT_READY.
1240  *
1241  * Since locking on the primary when we derive the information is not
1242  * strict, we note that there is a time window between the derivation and
1243  * writing to WAL of the derived information. That allows race conditions
1244  * that we must resolve, since xids and locks may enter or leave the
1245  * snapshot during that window. This creates the issue that an xid or
1246  * lock may start *after* the snapshot has been derived yet *before* the
1247  * snapshot is logged in the running xacts WAL record. We resolve this by
1248  * starting to accumulate changes at a point just prior to when we derive
1249  * the snapshot on the primary, then ignore duplicates when we later apply
1250  * the snapshot from the running xacts record. This is implemented during
1251  * CreateCheckPoint() where we use the logical checkpoint location as
1252  * our starting point and then write the running xacts record immediately
1253  * before writing the main checkpoint WAL record. Since we always start
1254  * up from a checkpoint and are immediately at our starting point, we
1255  * unconditionally move to STANDBY_INITIALIZED. After this point we
1256  * must do 4 things:
1257  * * move shared nextXid forwards as we see new xids
1258  * * extend the clog and subtrans with each new xid
1259  * * keep track of uncommitted known assigned xids
1260  * * keep track of uncommitted AccessExclusiveLocks
1261  *
1262  * When we see a commit/abort we must remove known assigned xids and locks
1263  * from the completing transaction. Attempted removals that cannot locate
1264  * an entry are expected and must not cause an error when we are in state
1265  * STANDBY_INITIALIZED. This is implemented in StandbyReleaseLocks() and
1266  * KnownAssignedXidsRemove().
1267  *
1268  * Later, when we apply the running xact data we must be careful to ignore
1269  * transactions already committed, since those commits raced ahead when
1270  * making WAL entries.
1271  *
1272  * For logical decoding only the running xacts information is needed;
1273  * there's no need to look at the locking information, but it's logged anyway,
1274  * as there's no independent knob to just enable logical decoding. For
1275  * details of how this is used, check snapbuild.c's introductory comment.
1276  *
1277  *
1278  * Returns the RecPtr of the last inserted record.
1279  */
1280 XLogRecPtr
1282 {
1283  XLogRecPtr recptr;
1284  RunningTransactions running;
1285  xl_standby_lock *locks;
1286  int nlocks;
1287 
1289 
1290  /*
1291  * Get details of any AccessExclusiveLocks being held at the moment.
1292  */
1293  locks = GetRunningTransactionLocks(&nlocks);
1294  if (nlocks > 0)
1295  LogAccessExclusiveLocks(nlocks, locks);
1296  pfree(locks);
1297 
1298  /*
1299  * Log details of all in-progress transactions. This should be the last
1300  * record we write, because standby will open up when it sees this.
1301  */
1302  running = GetRunningTransactionData();
1303 
1304  /*
1305  * GetRunningTransactionData() acquired ProcArrayLock, we must release it.
1306  * For Hot Standby this can be done before inserting the WAL record
1307  * because ProcArrayApplyRecoveryInfo() rechecks the commit status using
1308  * the clog. For logical decoding, though, the lock can't be released
1309  * early because the clog might be "in the future" from the POV of the
1310  * historic snapshot. This would allow for situations where we're waiting
1311  * for the end of a transaction listed in the xl_running_xacts record
1312  * which, according to the WAL, has committed before the xl_running_xacts
1313  * record. Fortunately this routine isn't executed frequently, and it's
1314  * only a shared lock.
1315  */
1317  LWLockRelease(ProcArrayLock);
1318 
1319  recptr = LogCurrentRunningXacts(running);
1320 
1321  /* Release lock if we kept it longer ... */
1323  LWLockRelease(ProcArrayLock);
1324 
1325  /* GetRunningTransactionData() acquired XidGenLock, we must release it */
1326  LWLockRelease(XidGenLock);
1327 
1328  return recptr;
1329 }
1330 
1331 /*
1332  * Record an enhanced snapshot of running transactions into WAL.
1333  *
1334  * The definitions of RunningTransactionsData and xl_running_xacts are
1335  * similar. We keep them separate because xl_running_xacts is a contiguous
1336  * chunk of memory and never exists fully until it is assembled in WAL.
1337  * The inserted records are marked as not being important for durability,
1338  * to avoid triggering superfluous checkpoint / archiving activity.
1339  */
1340 static XLogRecPtr
1342 {
1343  xl_running_xacts xlrec;
1344  XLogRecPtr recptr;
1345 
1346  xlrec.xcnt = CurrRunningXacts->xcnt;
1347  xlrec.subxcnt = CurrRunningXacts->subxcnt;
1348  xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY);
1349  xlrec.nextXid = CurrRunningXacts->nextXid;
1350  xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
1351  xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
1352 
1353  /* Header */
1354  XLogBeginInsert();
1356  XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts);
1357 
1358  /* array of TransactionIds */
1359  if (xlrec.xcnt > 0)
1360  XLogRegisterData((char *) CurrRunningXacts->xids,
1361  (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
1362 
1363  recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
1364 
1365  if (xlrec.subxid_overflow)
1366  elog(DEBUG2,
1367  "snapshot of %d running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
1368  CurrRunningXacts->xcnt,
1369  LSN_FORMAT_ARGS(recptr),
1370  CurrRunningXacts->oldestRunningXid,
1371  CurrRunningXacts->latestCompletedXid,
1372  CurrRunningXacts->nextXid);
1373  else
1374  elog(DEBUG2,
1375  "snapshot of %d+%d running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
1376  CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt,
1377  LSN_FORMAT_ARGS(recptr),
1378  CurrRunningXacts->oldestRunningXid,
1379  CurrRunningXacts->latestCompletedXid,
1380  CurrRunningXacts->nextXid);
1381 
1382  /*
1383  * Ensure running_xacts information is synced to disk not too far in the
1384  * future. We don't want to stall anything though (i.e. use XLogFlush()),
1385  * so we let the wal writer do it during normal operation.
1386  * XLogSetAsyncXactLSN() conveniently will mark the LSN as to-be-synced
1387  * and nudge the WALWriter into action if sleeping. Check
1388  * XLogBackgroundFlush() for details why a record might not be flushed
1389  * without it.
1390  */
1391  XLogSetAsyncXactLSN(recptr);
1392 
1393  return recptr;
1394 }
1395 
1396 /*
1397  * Wholesale logging of AccessExclusiveLocks. Other lock types need not be
1398  * logged, as described in backend/storage/lmgr/README.
1399  */
1400 static void
1402 {
1403  xl_standby_locks xlrec;
1404 
1405  xlrec.nlocks = nlocks;
1406 
1407  XLogBeginInsert();
1408  XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks));
1409  XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock));
1411 
1412  (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
1413 }
1414 
1415 /*
1416  * Individual logging of AccessExclusiveLocks for use during LockAcquire()
1417  */
1418 void
1420 {
1421  xl_standby_lock xlrec;
1422 
1423  xlrec.xid = GetCurrentTransactionId();
1424 
1425  xlrec.dbOid = dbOid;
1426  xlrec.relOid = relOid;
1427 
1428  LogAccessExclusiveLocks(1, &xlrec);
1430 }
1431 
1432 /*
1433  * Prepare to log an AccessExclusiveLock, for use during LockAcquire()
1434  */
1435 void
1437 {
1438  /*
1439  * Ensure that a TransactionId has been assigned to this transaction, for
1440  * two reasons, both related to lock release on the standby. First, we
1441  * must assign an xid so that RecordTransactionCommit() and
1442  * RecordTransactionAbort() do not optimise away the transaction
1443  * completion record which recovery relies upon to release locks. It's a
1444  * hack, but for a corner case not worth adding code for into the main
1445  * commit path. Second, we must assign an xid before the lock is recorded
1446  * in shared memory, otherwise a concurrently executing
1447  * GetRunningTransactionLocks() might see a lock associated with an
1448  * InvalidTransactionId which we later assert cannot happen.
1449  */
1450  (void) GetCurrentTransactionId();
1451 }
1452 
1453 /*
1454  * Emit WAL for invalidations. This currently is only used for commits without
1455  * an xid but which contain invalidations.
1456  */
1457 void
1459  bool relcacheInitFileInval)
1460 {
1461  xl_invalidations xlrec;
1462 
1463  /* prepare record */
1464  memset(&xlrec, 0, sizeof(xlrec));
1465  xlrec.dbId = MyDatabaseId;
1466  xlrec.tsId = MyDatabaseTableSpace;
1467  xlrec.relcacheInitFileInval = relcacheInitFileInval;
1468  xlrec.nmsgs = nmsgs;
1469 
1470  /* perform insertion */
1471  XLogBeginInsert();
1472  XLogRegisterData((char *) (&xlrec), MinSizeOfInvalidations);
1473  XLogRegisterData((char *) msgs,
1474  nmsgs * sizeof(SharedInvalidationMessage));
1475  XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS);
1476 }
1477 
1478 /* Return the description of recovery conflict */
1479 static const char *
1481 {
1482  const char *reasonDesc = _("unknown reason");
1483 
1484  switch (reason)
1485  {
1487  reasonDesc = _("recovery conflict on buffer pin");
1488  break;
1490  reasonDesc = _("recovery conflict on lock");
1491  break;
1493  reasonDesc = _("recovery conflict on tablespace");
1494  break;
1496  reasonDesc = _("recovery conflict on snapshot");
1497  break;
1499  reasonDesc = _("recovery conflict on replication slot");
1500  break;
1502  reasonDesc = _("recovery conflict on buffer deadlock");
1503  break;
1505  reasonDesc = _("recovery conflict on database");
1506  break;
1507  default:
1508  break;
1509  }
1510 
1511  return reasonDesc;
1512 }
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:485
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:467
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1720
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1780
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1608
static int32 next
Definition: blutils.c:219
static void cleanup(void)
Definition: bootstrap.c:704
bool HoldingBufferPinThatDelaysRecovery(void)
Definition: bufmgr.c:5373
uint8_t uint8
Definition: c.h:483
#define Assert(condition)
Definition: c.h:812
uint64_t uint64
Definition: c.h:486
uint32_t uint32
Definition: c.h:485
uint32 TransactionId
Definition: c.h:606
#define OidIsValid(objectId)
Definition: c.h:729
int64 TimestampTz
Definition: timestamp.h:39
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:865
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:853
int errdetail_log_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1272
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:31
#define DEBUG2
Definition: elog.h:29
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define DEBUG4
Definition: elog.h:27
ProcNumber MyProcNumber
Definition: globals.c:89
Oid MyDatabaseTableSpace
Definition: globals.c:95
Oid MyDatabaseId
Definition: globals.c:93
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
void ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, int nmsgs, bool RelcacheInitFileInval, Oid dbid, Oid tsid)
Definition: inval.c:1062
int i
Definition: isn.c:72
static volatile sig_atomic_t waiting
Definition: latch.c:162
LockAcquireResult LockAcquire(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock, bool dontWait)
Definition: lock.c:803
VirtualTransactionId * GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp)
Definition: lock.c:2976
void VirtualXactLockTableInsert(VirtualTransactionId vxid)
Definition: lock.c:4527
xl_standby_lock * GetRunningTransactionLocks(int *nlocks)
Definition: lock.c:4078
bool LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
Definition: lock.c:2011
void VirtualXactLockTableCleanup(void)
Definition: lock.c:4550
bool VirtualXactLock(VirtualTransactionId vxid, bool wait)
Definition: lock.c:4650
#define VirtualTransactionIdIsValid(vxid)
Definition: lock.h:67
#define SET_LOCKTAG_RELATION(locktag, dboid, reloid)
Definition: lock.h:181
#define AccessExclusiveLock
Definition: lockdefs.h:43
struct xl_standby_lock xl_standby_lock
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
void pfree(void *pointer)
Definition: mcxt.c:1521
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
static char * buf
Definition: pg_test_fsync.c:72
#define ERRCODE_T_R_DEADLOCK_DETECTED
Definition: pgbench.c:77
long pgstat_report_stat(bool force)
Definition: pgstat.c:671
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
PGPROC * ProcNumberGetProc(ProcNumber procNumber)
Definition: procarray.c:3142
pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3501
void ExpireAllKnownAssignedTransactionIds(void)
Definition: procarray.c:4502
VirtualTransactionId * GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
Definition: procarray.c:3421
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2693
void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3664
pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
Definition: procarray.c:3495
int CountDBBackends(Oid databaseid)
Definition: procarray.c:3603
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
ProcSignalReason
Definition: procsignal.h:31
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition: procsignal.h:47
@ PROCSIG_RECOVERY_CONFLICT_LOCK
Definition: procsignal.h:44
@ PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT
Definition: procsignal.h:46
@ PROCSIG_RECOVERY_CONFLICT_DATABASE
Definition: procsignal.h:42
@ PROCSIG_RECOVERY_CONFLICT_SNAPSHOT
Definition: procsignal.h:45
@ PROCSIG_RECOVERY_CONFLICT_TABLESPACE
Definition: procsignal.h:43
@ PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK
Definition: procsignal.h:48
void set_ps_display_remove_suffix(void)
Definition: ps_status.c:421
void set_ps_display_suffix(const char *suffix)
Definition: ps_status.c:369
bool update_process_title
Definition: ps_status.c:29
void pg_usleep(long microsec)
Definition: signal.c:53
void SharedInvalBackendInit(bool sendOnly)
Definition: sinvaladt.c:271
LocalTransactionId GetNextLocalTransactionId(void)
Definition: sinvaladt.c:700
bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1811
@ RS_INVAL_HORIZON
Definition: slot.h:56
PGPROC * MyProc
Definition: proc.c:66
int DeadlockTimeout
Definition: proc.c:57
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1891
void standby_redo(XLogReaderState *record)
Definition: standby.c:1162
void ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:511
static bool WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
Definition: standby.c:233
static volatile sig_atomic_t got_standby_deadlock_timeout
Definition: standby.c:68
static TimestampTz GetStandbyLimitTime(void)
Definition: standby.c:200
void StandbyTimeoutHandler(void)
Definition: standby.c:944
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:792
static volatile sig_atomic_t got_standby_delay_timeout
Definition: standby.c:69
void StandbyLockTimeoutHandler(void)
Definition: standby.c:953
static int standbyWait_us
Definition: standby.c:225
static void StandbyReleaseXidEntryLocks(RecoveryLockXidEntry *xidentry)
Definition: standby.c:1034
void StandbyDeadLockHandler(void)
Definition: standby.c:935
static HTAB * RecoveryLockXidHash
Definition: standby.c:65
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1281
struct RecoveryLockEntry RecoveryLockEntry
void CheckRecoveryConflictDeadlock(void)
Definition: standby.c:904
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:94
void ResolveRecoveryConflictWithTablespace(Oid tsid)
Definition: standby.c:538
static const char * get_recovery_conflict_desc(ProcSignalReason reason)
Definition: standby.c:1480
bool log_recovery_conflict_waits
Definition: standby.c:41
#define STANDBY_INITIAL_WAIT_US
Definition: standby.c:224
static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, ProcSignalReason reason, uint32 wait_event_info, bool report_waiting)
Definition: standby.c:359
static volatile sig_atomic_t got_standby_lock_timeout
Definition: standby.c:70
void ResolveRecoveryConflictWithDatabase(Oid dbid)
Definition: standby.c:568
void StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition: standby.c:1091
void StandbyReleaseOldLocks(TransactionId oldxid)
Definition: standby.c:1129
struct RecoveryLockXidEntry RecoveryLockXidEntry
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:467
static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
Definition: standby.c:876
void LogAccessExclusiveLockPrepare(void)
Definition: standby.c:1436
static HTAB * RecoveryLockHash
Definition: standby.c:64
static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
Definition: standby.c:1401
void LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs, bool relcacheInitFileInval)
Definition: standby.c:1458
void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
Definition: standby.c:985
static void StandbyReleaseLocks(TransactionId xid)
Definition: standby.c:1067
void LogAccessExclusiveLock(Oid dbOid, Oid relOid)
Definition: standby.c:1419
int max_standby_archive_delay
Definition: standby.c:39
void StandbyReleaseAllLocks(void)
Definition: standby.c:1105
int max_standby_streaming_delay
Definition: standby.c:40
static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
Definition: standby.c:1341
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:273
void ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
Definition: standby.c:622
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:160
#define MinSizeOfXactRunningXacts
Definition: standby.h:63
@ SUBXIDS_MISSING
Definition: standby.h:81
@ SUBXIDS_IN_ARRAY
Definition: standby.h:80
#define XLOG_INVALIDATIONS
Definition: standbydefs.h:36
#define MinSizeOfInvalidations
Definition: standbydefs.h:72
#define XLOG_STANDBY_LOCK
Definition: standbydefs.h:34
#define XLOG_RUNNING_XACTS
Definition: standbydefs.h:35
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:94
void initStringInfo(StringInfo str)
Definition: stringinfo.c:56
TimeoutType type
Definition: timeout.h:61
TimestampTz fin_time
Definition: timeout.h:63
TimeoutId id
Definition: timeout.h:60
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
Definition: dynahash.c:220
Definition: lock.h:165
uint8 locktag_type
Definition: lock.h:170
Definition: proc.h:162
struct PGPROC::@122 vxid
pg_atomic_uint64 waitStart
Definition: proc.h:237
ProcNumber procNumber
Definition: proc.h:195
int pid
Definition: proc.h:182
struct RecoveryLockEntry * next
Definition: standby.c:55
xl_standby_lock key
Definition: standby.c:54
TransactionId xid
Definition: standby.c:60
struct RecoveryLockEntry * head
Definition: standby.c:61
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId nextXid
Definition: standby.h:91
TransactionId latestCompletedXid
Definition: standby.h:95
subxids_array_status subxid_status
Definition: standby.h:90
TransactionId * xids
Definition: standby.h:97
LocalTransactionId localTransactionId
Definition: lock.h:62
ProcNumber procNumber
Definition: lock.h:61
SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:69
bool relcacheInitFileInval
Definition: standbydefs.h:67
TransactionId latestCompletedXid
Definition: standbydefs.h:54
TransactionId oldestRunningXid
Definition: standbydefs.h:53
TransactionId xids[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:56
TransactionId nextXid
Definition: standbydefs.h:52
TransactionId xid
Definition: lockdefs.h:53
xl_standby_lock locks[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:41
void disable_all_timeouts(bool keep_indicators)
Definition: timeout.c:751
void enable_timeouts(const EnableTimeoutParams *timeouts, int count)
Definition: timeout.c:630
@ STANDBY_LOCK_TIMEOUT
Definition: timeout.h:32
@ STANDBY_DEADLOCK_TIMEOUT
Definition: timeout.h:30
@ STANDBY_TIMEOUT
Definition: timeout.h:31
@ TMPARAM_AT
Definition: timeout.h:54
@ TMPARAM_AFTER
Definition: timeout.h:53
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:188
#define InvalidTransactionId
Definition: transam.h:31
#define U64FromFullTransactionId(x)
Definition: transam.h:49
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
#define MaxTransactionId
Definition: transam.h:35
bool StandbyTransactionIdIsPrepared(TransactionId xid)
Definition: twophase.c:1475
#define TimestampTzPlusMilliseconds(tz, ms)
Definition: timestamp.h:85
FullTransactionId ReadNextFullTransactionId(void)
Definition: varsup.c:288
#define PG_WAIT_LOCK
Definition: wait_event.h:19
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:453
int MyXactFlags
Definition: xact.c:135
#define XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK
Definition: xact.h:108
int wal_level
Definition: xlog.c:131
void XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
Definition: xlog.c:2631
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:155
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream)
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:52
@ STANDBY_INITIALIZED
Definition: xlogutils.h:53
#define InHotStandby
Definition: xlogutils.h:60