PostgreSQL Source Code  git master
standby.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * standby.c
4  * Misc functions used in Hot Standby mode.
5  *
6  * All functions for handling RM_STANDBY_ID, which relate to
7  * AccessExclusiveLocks and starting snapshots for Hot Standby mode.
8  * Plus conflict recovery processing.
9  *
10  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/storage/ipc/standby.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 #include "access/transam.h"
20 #include "access/twophase.h"
21 #include "access/xact.h"
22 #include "access/xloginsert.h"
23 #include "access/xlogrecovery.h"
24 #include "access/xlogutils.h"
25 #include "miscadmin.h"
26 #include "pgstat.h"
27 #include "storage/bufmgr.h"
28 #include "storage/lmgr.h"
29 #include "storage/proc.h"
30 #include "storage/procarray.h"
31 #include "storage/sinvaladt.h"
32 #include "storage/standby.h"
33 #include "utils/hsearch.h"
34 #include "utils/memutils.h"
35 #include "utils/ps_status.h"
36 #include "utils/timeout.h"
37 #include "utils/timestamp.h"
38 
39 /* User-settable GUC parameters */
41 int max_standby_archive_delay = 30 * 1000;
44 
45 /*
46  * Keep track of all the exclusive locks owned by original transactions.
47  * For each known exclusive lock, there is a RecoveryLockEntry in the
48  * RecoveryLockHash hash table. All RecoveryLockEntrys belonging to a
49  * given XID are chained together so that we can find them easily.
50  * For each original transaction that is known to have any such locks,
51  * there is a RecoveryLockXidEntry in the RecoveryLockXidHash hash table,
52  * which stores the head of the chain of its locks.
53  */
54 typedef struct RecoveryLockEntry
55 {
56  xl_standby_lock key; /* hash key: xid, dbOid, relOid */
57  struct RecoveryLockEntry *next; /* chain link */
59 
60 typedef struct RecoveryLockXidEntry
61 {
62  TransactionId xid; /* hash key -- must be first */
63  struct RecoveryLockEntry *head; /* chain head */
65 
66 static HTAB *RecoveryLockHash = NULL;
67 static HTAB *RecoveryLockXidHash = NULL;
68 
69 /* Flags set by timeout handlers */
70 static volatile sig_atomic_t got_standby_deadlock_timeout = false;
71 static volatile sig_atomic_t got_standby_delay_timeout = false;
72 static volatile sig_atomic_t got_standby_lock_timeout = false;
73 
75  ProcSignalReason reason,
76  uint32 wait_event_info,
77  bool report_waiting);
80 static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
81 static const char *get_recovery_conflict_desc(ProcSignalReason reason);
82 
83 /*
84  * InitRecoveryTransactionEnvironment
85  * Initialize tracking of our primary's in-progress transactions.
86  *
87  * We need to issue shared invalidations and hold locks. Holding locks
88  * means others may want to wait on us, so we need to make a lock table
89  * vxact entry like a real transaction. We could create and delete
90  * lock table entries for each transaction but its simpler just to create
91  * one permanent entry and leave it there all the time. Locks are then
92  * acquired and released as needed. Yes, this means you can see the
93  * Startup process in pg_locks once we have run this.
94  */
95 void
97 {
99  HASHCTL hash_ctl;
100 
101  Assert(RecoveryLockHash == NULL); /* don't run this twice */
102 
103  /*
104  * Initialize the hash tables for tracking the locks held by each
105  * transaction.
106  */
107  hash_ctl.keysize = sizeof(xl_standby_lock);
108  hash_ctl.entrysize = sizeof(RecoveryLockEntry);
109  RecoveryLockHash = hash_create("RecoveryLockHash",
110  64,
111  &hash_ctl,
113  hash_ctl.keysize = sizeof(TransactionId);
114  hash_ctl.entrysize = sizeof(RecoveryLockXidEntry);
115  RecoveryLockXidHash = hash_create("RecoveryLockXidHash",
116  64,
117  &hash_ctl,
119 
120  /*
121  * Initialize shared invalidation management for Startup process, being
122  * careful to register ourselves as a sendOnly process so we don't need to
123  * read messages, nor will we get signaled when the queue starts filling
124  * up.
125  */
127 
128  /*
129  * Lock a virtual transaction id for Startup process.
130  *
131  * We need to do GetNextLocalTransactionId() because
132  * SharedInvalBackendInit() leaves localTransactionId invalid and the lock
133  * manager doesn't like that at all.
134  *
135  * Note that we don't need to run XactLockTableInsert() because nobody
136  * needs to wait on xids. That sounds a little strange, but table locks
137  * are held by vxids and row level locks are held by xids. All queries
138  * hold AccessShareLocks so never block while we write or lock new rows.
139  */
140  vxid.backendId = MyBackendId;
143 
145 }
146 
147 /*
148  * ShutdownRecoveryTransactionEnvironment
149  * Shut down transaction tracking
150  *
151  * Prepare to switch from hot standby mode to normal operation. Shut down
152  * recovery-time transaction tracking.
153  *
154  * This must be called even in shutdown of startup process if transaction
155  * tracking has been initialized. Otherwise some locks the tracked
156  * transactions were holding will not be released and may interfere with
157  * the processes still running (but will exit soon later) at the exit of
158  * startup process.
159  */
160 void
162 {
163  /*
164  * Do nothing if RecoveryLockHash is NULL because that means that
165  * transaction tracking has not yet been initialized or has already been
166  * shut down. This makes it safe to have possibly-redundant calls of this
167  * function during process exit.
168  */
169  if (RecoveryLockHash == NULL)
170  return;
171 
172  /* Mark all tracked in-progress transactions as finished. */
174 
175  /* Release all locks the tracked transactions were holding */
177 
178  /* Destroy the lock hash tables. */
181  RecoveryLockHash = NULL;
182  RecoveryLockXidHash = NULL;
183 
184  /* Cleanup our VirtualTransaction */
186 }
187 
188 
189 /*
190  * -----------------------------------------------------
191  * Standby wait timers and backend cancel logic
192  * -----------------------------------------------------
193  */
194 
195 /*
196  * Determine the cutoff time at which we want to start canceling conflicting
197  * transactions. Returns zero (a time safely in the past) if we are willing
198  * to wait forever.
199  */
200 static TimestampTz
202 {
203  TimestampTz rtime;
204  bool fromStream;
205 
206  /*
207  * The cutoff time is the last WAL data receipt time plus the appropriate
208  * delay variable. Delay of -1 means wait forever.
209  */
210  GetXLogReceiptTime(&rtime, &fromStream);
211  if (fromStream)
212  {
214  return 0; /* wait forever */
216  }
217  else
218  {
220  return 0; /* wait forever */
222  }
223 }
224 
225 #define STANDBY_INITIAL_WAIT_US 1000
227 
228 /*
229  * Standby wait logic for ResolveRecoveryConflictWithVirtualXIDs.
230  * We wait here for a while then return. If we decide we can't wait any
231  * more then we return true, if we can wait some more return false.
232  */
233 static bool
235 {
236  TimestampTz ltime;
237 
239 
240  /* Are we past the limit time? */
241  ltime = GetStandbyLimitTime();
242  if (ltime && GetCurrentTimestamp() >= ltime)
243  return true;
244 
245  /*
246  * Sleep a bit (this is essential to avoid busy-waiting).
247  */
248  pgstat_report_wait_start(wait_event_info);
251 
252  /*
253  * Progressively increase the sleep times, but not to more than 1s, since
254  * pg_usleep isn't interruptible on some platforms.
255  */
256  standbyWait_us *= 2;
257  if (standbyWait_us > 1000000)
258  standbyWait_us = 1000000;
259 
260  return false;
261 }
262 
263 /*
264  * Log the recovery conflict.
265  *
266  * wait_start is the timestamp when the caller started to wait.
267  * now is the timestamp when this function has been called.
268  * wait_list is the list of virtual transaction ids assigned to
269  * conflicting processes. still_waiting indicates whether
270  * the startup process is still waiting for the recovery conflict
271  * to be resolved or not.
272  */
273 void
276  bool still_waiting)
277 {
278  long secs;
279  int usecs;
280  long msecs;
282  int nprocs = 0;
283 
284  /*
285  * There must be no conflicting processes when the recovery conflict has
286  * already been resolved.
287  */
288  Assert(still_waiting || wait_list == NULL);
289 
290  TimestampDifference(wait_start, now, &secs, &usecs);
291  msecs = secs * 1000 + usecs / 1000;
292  usecs = usecs % 1000;
293 
294  if (wait_list)
295  {
296  VirtualTransactionId *vxids;
297 
298  /* Construct a string of list of the conflicting processes */
299  vxids = wait_list;
300  while (VirtualTransactionIdIsValid(*vxids))
301  {
302  PGPROC *proc = BackendIdGetProc(vxids->backendId);
303 
304  /* proc can be NULL if the target backend is not active */
305  if (proc)
306  {
307  if (nprocs == 0)
308  {
310  appendStringInfo(&buf, "%d", proc->pid);
311  }
312  else
313  appendStringInfo(&buf, ", %d", proc->pid);
314 
315  nprocs++;
316  }
317 
318  vxids++;
319  }
320  }
321 
322  /*
323  * If wait_list is specified, report the list of PIDs of active
324  * conflicting backends in a detail message. Note that if all the backends
325  * in the list are not active, no detail message is logged.
326  */
327  if (still_waiting)
328  {
329  ereport(LOG,
330  errmsg("recovery still waiting after %ld.%03d ms: %s",
331  msecs, usecs, get_recovery_conflict_desc(reason)),
332  nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
333  "Conflicting processes: %s.",
334  nprocs, buf.data) : 0);
335  }
336  else
337  {
338  ereport(LOG,
339  errmsg("recovery finished waiting after %ld.%03d ms: %s",
340  msecs, usecs, get_recovery_conflict_desc(reason)));
341  }
342 
343  if (nprocs > 0)
344  pfree(buf.data);
345 }
346 
347 /*
348  * This is the main executioner for any query backend that conflicts with
349  * recovery processing. Judgement has already been passed on it within
350  * a specific rmgr. Here we just issue the orders to the procs. The procs
351  * then throw the required error as instructed.
352  *
353  * If report_waiting is true, "waiting" is reported in PS display and the
354  * wait for recovery conflict is reported in the log, if necessary. If
355  * the caller is responsible for reporting them, report_waiting should be
356  * false. Otherwise, both the caller and this function report the same
357  * thing unexpectedly.
358  */
359 static void
361  ProcSignalReason reason, uint32 wait_event_info,
362  bool report_waiting)
363 {
364  TimestampTz waitStart = 0;
365  char *new_status = NULL;
366  bool logged_recovery_conflict = false;
367 
368  /* Fast exit, to avoid a kernel call if there's no work to be done. */
369  if (!VirtualTransactionIdIsValid(*waitlist))
370  return;
371 
372  /* Set the wait start timestamp for reporting */
373  if (report_waiting && (log_recovery_conflict_waits || update_process_title))
374  waitStart = GetCurrentTimestamp();
375 
376  while (VirtualTransactionIdIsValid(*waitlist))
377  {
378  /* reset standbyWait_us for each xact we wait for */
380 
381  /* wait until the virtual xid is gone */
382  while (!VirtualXactLock(*waitlist, false))
383  {
384  /* Is it time to kill it? */
385  if (WaitExceedsMaxStandbyDelay(wait_event_info))
386  {
387  pid_t pid;
388 
389  /*
390  * Now find out who to throw out of the balloon.
391  */
393  pid = CancelVirtualTransaction(*waitlist, reason);
394 
395  /*
396  * Wait a little bit for it to die so that we avoid flooding
397  * an unresponsive backend when system is heavily loaded.
398  */
399  if (pid != 0)
400  pg_usleep(5000L);
401  }
402 
403  if (waitStart != 0 && (!logged_recovery_conflict || new_status == NULL))
404  {
405  TimestampTz now = 0;
406  bool maybe_log_conflict;
407  bool maybe_update_title;
408 
409  maybe_log_conflict = (log_recovery_conflict_waits && !logged_recovery_conflict);
410  maybe_update_title = (update_process_title && new_status == NULL);
411 
412  /* Get the current timestamp if not report yet */
413  if (maybe_log_conflict || maybe_update_title)
415 
416  /*
417  * Report via ps if we have been waiting for more than 500
418  * msec (should that be configurable?)
419  */
420  if (maybe_update_title &&
421  TimestampDifferenceExceeds(waitStart, now, 500))
422  {
423  const char *old_status;
424  int len;
425 
426  old_status = get_ps_display(&len);
427  new_status = (char *) palloc(len + 8 + 1);
428  memcpy(new_status, old_status, len);
429  strcpy(new_status + len, " waiting");
430  set_ps_display(new_status);
431  new_status[len] = '\0'; /* truncate off " waiting" */
432  }
433 
434  /*
435  * Emit the log message if the startup process is waiting
436  * longer than deadlock_timeout for recovery conflict.
437  */
438  if (maybe_log_conflict &&
440  {
441  LogRecoveryConflict(reason, waitStart, now, waitlist, true);
442  logged_recovery_conflict = true;
443  }
444  }
445  }
446 
447  /* The virtual transaction is gone now, wait for the next one */
448  waitlist++;
449  }
450 
451  /*
452  * Emit the log message if recovery conflict was resolved but the startup
453  * process waited longer than deadlock_timeout for it.
454  */
455  if (logged_recovery_conflict)
456  LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
457  NULL, false);
458 
459  /* Reset ps display if we changed it */
460  if (new_status)
461  {
462  set_ps_display(new_status);
463  pfree(new_status);
464  }
465 }
466 
467 /*
468  * Generate whatever recovery conflicts are needed to eliminate snapshots that
469  * might see XIDs <= snapshotConflictHorizon as still running.
470  *
471  * snapshotConflictHorizon cutoffs are our standard approach to generating
472  * granular recovery conflicts. Note that InvalidTransactionId values are
473  * interpreted as "definitely don't need any conflicts" here, which is a
474  * general convention that WAL records can (and often do) depend on.
475  */
476 void
478  RelFileLocator locator)
479 {
480  VirtualTransactionId *backends;
481 
482  /*
483  * If we get passed InvalidTransactionId then we do nothing (no conflict).
484  *
485  * This can happen when replaying already-applied WAL records after a
486  * standby crash or restart, or when replaying an XLOG_HEAP2_VISIBLE
487  * record that marks as frozen a page which was already all-visible. It's
488  * also quite common with records generated during index deletion
489  * (original execution of the deletion can reason that a recovery conflict
490  * which is sufficient for the deletion operation must take place before
491  * replay of the deletion record itself).
492  */
493  if (!TransactionIdIsValid(snapshotConflictHorizon))
494  return;
495 
496  backends = GetConflictingVirtualXIDs(snapshotConflictHorizon,
497  locator.dbOid);
501  true);
502 }
503 
504 /*
505  * Variant of ResolveRecoveryConflictWithSnapshot that works with
506  * FullTransactionId values
507  */
508 void
510  RelFileLocator locator)
511 {
512  /*
513  * ResolveRecoveryConflictWithSnapshot operates on 32-bit TransactionIds,
514  * so truncate the logged FullTransactionId. If the logged value is very
515  * old, so that XID wrap-around already happened on it, there can't be any
516  * snapshots that still see it.
517  */
519  uint64 diff;
520 
521  diff = U64FromFullTransactionId(nextXid) -
522  U64FromFullTransactionId(snapshotConflictHorizon);
523  if (diff < MaxTransactionId / 2)
524  {
525  TransactionId truncated;
526 
527  truncated = XidFromFullTransactionId(snapshotConflictHorizon);
528  ResolveRecoveryConflictWithSnapshot(truncated, locator);
529  }
530 }
531 
532 void
534 {
535  VirtualTransactionId *temp_file_users;
536 
537  /*
538  * Standby users may be currently using this tablespace for their
539  * temporary files. We only care about current users because
540  * temp_tablespace parameter will just ignore tablespaces that no longer
541  * exist.
542  *
543  * Ask everybody to cancel their queries immediately so we can ensure no
544  * temp files remain and we can remove the tablespace. Nuke the entire
545  * site from orbit, it's the only way to be sure.
546  *
547  * XXX: We could work out the pids of active backends using this
548  * tablespace by examining the temp filenames in the directory. We would
549  * then convert the pids into VirtualXIDs before attempting to cancel
550  * them.
551  *
552  * We don't wait for commit because drop tablespace is non-transactional.
553  */
555  InvalidOid);
559  true);
560 }
561 
562 void
564 {
565  /*
566  * We don't do ResolveRecoveryConflictWithVirtualXIDs() here since that
567  * only waits for transactions and completely idle sessions would block
568  * us. This is rare enough that we do this as simply as possible: no wait,
569  * just force them off immediately.
570  *
571  * No locking is required here because we already acquired
572  * AccessExclusiveLock. Anybody trying to connect while we do this will
573  * block during InitPostgres() and then disconnect when they see the
574  * database has been removed.
575  */
576  while (CountDBBackends(dbid) > 0)
577  {
579 
580  /*
581  * Wait awhile for them to die so that we avoid flooding an
582  * unresponsive backend when system is heavily loaded.
583  */
584  pg_usleep(10000);
585  }
586 }
587 
588 /*
589  * ResolveRecoveryConflictWithLock is called from ProcSleep()
590  * to resolve conflicts with other backends holding relation locks.
591  *
592  * The WaitLatch sleep normally done in ProcSleep()
593  * (when not InHotStandby) is performed here, for code clarity.
594  *
595  * We either resolve conflicts immediately or set a timeout to wake us at
596  * the limit of our patience.
597  *
598  * Resolve conflicts by canceling to all backends holding a conflicting
599  * lock. As we are already queued to be granted the lock, no new lock
600  * requests conflicting with ours will be granted in the meantime.
601  *
602  * We also must check for deadlocks involving the Startup process and
603  * hot-standby backend processes. If deadlock_timeout is reached in
604  * this function, all the backends holding the conflicting locks are
605  * requested to check themselves for deadlocks.
606  *
607  * logging_conflict should be true if the recovery conflict has not been
608  * logged yet even though logging is enabled. After deadlock_timeout is
609  * reached and the request for deadlock check is sent, we wait again to
610  * be signaled by the release of the lock if logging_conflict is false.
611  * Otherwise we return without waiting again so that the caller can report
612  * the recovery conflict. In this case, then, this function is called again
613  * with logging_conflict=false (because the recovery conflict has already
614  * been logged) and we will wait again for the lock to be released.
615  */
616 void
617 ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
618 {
619  TimestampTz ltime;
621 
623 
624  ltime = GetStandbyLimitTime();
626 
627  /*
628  * Update waitStart if first time through after the startup process
629  * started waiting for the lock. It should not be updated every time
630  * ResolveRecoveryConflictWithLock() is called during the wait.
631  *
632  * Use the current time obtained for comparison with ltime as waitStart
633  * (i.e., the time when this process started waiting for the lock). Since
634  * getting the current time newly can cause overhead, we reuse the
635  * already-obtained time to avoid that overhead.
636  *
637  * Note that waitStart is updated without holding the lock table's
638  * partition lock, to avoid the overhead by additional lock acquisition.
639  * This can cause "waitstart" in pg_locks to become NULL for a very short
640  * period of time after the wait started even though "granted" is false.
641  * This is OK in practice because we can assume that users are likely to
642  * look at "waitstart" when waiting for the lock for a long time.
643  */
644  if (pg_atomic_read_u64(&MyProc->waitStart) == 0)
646 
647  if (now >= ltime && ltime != 0)
648  {
649  /*
650  * We're already behind, so clear a path as quickly as possible.
651  */
652  VirtualTransactionId *backends;
653 
654  backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
655 
656  /*
657  * Prevent ResolveRecoveryConflictWithVirtualXIDs() from reporting
658  * "waiting" in PS display by disabling its argument report_waiting
659  * because the caller, WaitOnLock(), has already reported that.
660  */
663  PG_WAIT_LOCK | locktag.locktag_type,
664  false);
665  }
666  else
667  {
668  /*
669  * Wait (or wait again) until ltime, and check for deadlocks as well
670  * if we will be waiting longer than deadlock_timeout
671  */
672  EnableTimeoutParams timeouts[2];
673  int cnt = 0;
674 
675  if (ltime != 0)
676  {
677  got_standby_lock_timeout = false;
678  timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
679  timeouts[cnt].type = TMPARAM_AT;
680  timeouts[cnt].fin_time = ltime;
681  cnt++;
682  }
683 
685  timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
686  timeouts[cnt].type = TMPARAM_AFTER;
687  timeouts[cnt].delay_ms = DeadlockTimeout;
688  cnt++;
689 
690  enable_timeouts(timeouts, cnt);
691  }
692 
693  /* Wait to be signaled by the release of the Relation Lock */
695 
696  /*
697  * Exit if ltime is reached. Then all the backends holding conflicting
698  * locks will be canceled in the next ResolveRecoveryConflictWithLock()
699  * call.
700  */
702  goto cleanup;
703 
705  {
706  VirtualTransactionId *backends;
707 
708  backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
709 
710  /* Quick exit if there's no work to be done */
711  if (!VirtualTransactionIdIsValid(*backends))
712  goto cleanup;
713 
714  /*
715  * Send signals to all the backends holding the conflicting locks, to
716  * ask them to check themselves for deadlocks.
717  */
718  while (VirtualTransactionIdIsValid(*backends))
719  {
720  SignalVirtualTransaction(*backends,
722  false);
723  backends++;
724  }
725 
726  /*
727  * Exit if the recovery conflict has not been logged yet even though
728  * logging is enabled, so that the caller can log that. Then
729  * RecoveryConflictWithLock() is called again and we will wait again
730  * for the lock to be released.
731  */
732  if (logging_conflict)
733  goto cleanup;
734 
735  /*
736  * Wait again here to be signaled by the release of the Relation Lock,
737  * to prevent the subsequent RecoveryConflictWithLock() from causing
738  * deadlock_timeout and sending a request for deadlocks check again.
739  * Otherwise the request continues to be sent every deadlock_timeout
740  * until the relation locks are released or ltime is reached.
741  */
744  }
745 
746 cleanup:
747 
748  /*
749  * Clear any timeout requests established above. We assume here that the
750  * Startup process doesn't have any other outstanding timeouts than those
751  * used by this function. If that stops being true, we could cancel the
752  * timeouts individually, but that'd be slower.
753  */
754  disable_all_timeouts(false);
755  got_standby_lock_timeout = false;
757 }
758 
759 /*
760  * ResolveRecoveryConflictWithBufferPin is called from LockBufferForCleanup()
761  * to resolve conflicts with other backends holding buffer pins.
762  *
763  * The ProcWaitForSignal() sleep normally done in LockBufferForCleanup()
764  * (when not InHotStandby) is performed here, for code clarity.
765  *
766  * We either resolve conflicts immediately or set a timeout to wake us at
767  * the limit of our patience.
768  *
769  * Resolve conflicts by sending a PROCSIG signal to all backends to check if
770  * they hold one of the buffer pins that is blocking Startup process. If so,
771  * those backends will take an appropriate error action, ERROR or FATAL.
772  *
773  * We also must check for deadlocks. Deadlocks occur because if queries
774  * wait on a lock, that must be behind an AccessExclusiveLock, which can only
775  * be cleared if the Startup process replays a transaction completion record.
776  * If Startup process is also waiting then that is a deadlock. The deadlock
777  * can occur if the query is waiting and then the Startup sleeps, or if
778  * Startup is sleeping and the query waits on a lock. We protect against
779  * only the former sequence here, the latter sequence is checked prior to
780  * the query sleeping, in CheckRecoveryConflictDeadlock().
781  *
782  * Deadlocks are extremely rare, and relatively expensive to check for,
783  * so we don't do a deadlock check right away ... only if we have had to wait
784  * at least deadlock_timeout.
785  */
786 void
788 {
789  TimestampTz ltime;
790 
792 
793  ltime = GetStandbyLimitTime();
794 
795  if (GetCurrentTimestamp() >= ltime && ltime != 0)
796  {
797  /*
798  * We're already behind, so clear a path as quickly as possible.
799  */
801  }
802  else
803  {
804  /*
805  * Wake up at ltime, and check for deadlocks as well if we will be
806  * waiting longer than deadlock_timeout
807  */
808  EnableTimeoutParams timeouts[2];
809  int cnt = 0;
810 
811  if (ltime != 0)
812  {
813  timeouts[cnt].id = STANDBY_TIMEOUT;
814  timeouts[cnt].type = TMPARAM_AT;
815  timeouts[cnt].fin_time = ltime;
816  cnt++;
817  }
818 
820  timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
821  timeouts[cnt].type = TMPARAM_AFTER;
822  timeouts[cnt].delay_ms = DeadlockTimeout;
823  cnt++;
824 
825  enable_timeouts(timeouts, cnt);
826  }
827 
828  /*
829  * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
830  * by one of the timeouts established above.
831  *
832  * We assume that only UnpinBuffer() and the timeout requests established
833  * above can wake us up here. WakeupRecovery() called by walreceiver or
834  * SIGHUP signal handler, etc cannot do that because it uses the different
835  * latch from that ProcWaitForSignal() waits on.
836  */
838 
842  {
843  /*
844  * Send out a request for hot-standby backends to check themselves for
845  * deadlocks.
846  *
847  * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
848  * to be signaled by UnpinBuffer() again and send a request for
849  * deadlocks check if deadlock_timeout happens. This causes the
850  * request to continue to be sent every deadlock_timeout until the
851  * buffer is unpinned or ltime is reached. This would increase the
852  * workload in the startup process and backends. In practice it may
853  * not be so harmful because the period that the buffer is kept pinned
854  * is basically no so long. But we should fix this?
855  */
857  }
858 
859  /*
860  * Clear any timeout requests established above. We assume here that the
861  * Startup process doesn't have any other timeouts than what this function
862  * uses. If that stops being true, we could cancel the timeouts
863  * individually, but that'd be slower.
864  */
865  disable_all_timeouts(false);
868 }
869 
870 static void
872 {
875 
876  /*
877  * We send signal to all backends to ask them if they are holding the
878  * buffer pin which is delaying the Startup process. We must not set the
879  * conflict flag yet, since most backends will be innocent. Let the
880  * SIGUSR1 handling in each backend decide their own fate.
881  */
882  CancelDBBackends(InvalidOid, reason, false);
883 }
884 
885 /*
886  * In Hot Standby perform early deadlock detection. We abort the lock
887  * wait if we are about to sleep while holding the buffer pin that Startup
888  * process is waiting for.
889  *
890  * Note: this code is pessimistic, because there is no way for it to
891  * determine whether an actual deadlock condition is present: the lock we
892  * need to wait for might be unrelated to any held by the Startup process.
893  * Sooner or later, this mechanism should get ripped out in favor of somehow
894  * accounting for buffer locks in DeadLockCheck(). However, errors here
895  * seem to be very low-probability in practice, so for now it's not worth
896  * the trouble.
897  */
898 void
900 {
901  Assert(!InRecovery); /* do not call in Startup process */
902 
904  return;
905 
906  /*
907  * Error message should match ProcessInterrupts() but we avoid calling
908  * that because we aren't handling an interrupt at this point. Note that
909  * we only cancel the current transaction here, so if we are in a
910  * subtransaction and the pin is held by a parent, then the Startup
911  * process will continue to wait even though we have avoided deadlock.
912  */
913  ereport(ERROR,
915  errmsg("canceling statement due to conflict with recovery"),
916  errdetail("User transaction caused buffer deadlock with recovery.")));
917 }
918 
919 
920 /* --------------------------------
921  * timeout handler routines
922  * --------------------------------
923  */
924 
925 /*
926  * StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT is
927  * exceeded.
928  */
929 void
931 {
933 }
934 
935 /*
936  * StandbyTimeoutHandler() will be called if STANDBY_TIMEOUT is exceeded.
937  */
938 void
940 {
942 }
943 
944 /*
945  * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
946  */
947 void
949 {
951 }
952 
953 /*
954  * -----------------------------------------------------
955  * Locking in Recovery Mode
956  * -----------------------------------------------------
957  *
958  * All locks are held by the Startup process using a single virtual
959  * transaction. This implementation is both simpler and in some senses,
960  * more correct. The locks held mean "some original transaction held
961  * this lock, so query access is not allowed at this time". So the Startup
962  * process is the proxy by which the original locks are implemented.
963  *
964  * We only keep track of AccessExclusiveLocks, which are only ever held by
965  * one transaction on one relation.
966  *
967  * We keep a table of known locks in the RecoveryLockHash hash table.
968  * The point of that table is to let us efficiently de-duplicate locks,
969  * which is important because checkpoints will re-report the same locks
970  * already held. There is also a RecoveryLockXidHash table with one entry
971  * per xid, which allows us to efficiently find all the locks held by a
972  * given original transaction.
973  *
974  * We use session locks rather than normal locks so we don't need
975  * ResourceOwners.
976  */
977 
978 
979 void
981 {
982  RecoveryLockXidEntry *xidentry;
983  RecoveryLockEntry *lockentry;
985  LOCKTAG locktag;
986  bool found;
987 
988  /* Already processed? */
989  if (!TransactionIdIsValid(xid) ||
990  TransactionIdDidCommit(xid) ||
992  return;
993 
995  "adding recovery lock: db %u rel %u", dbOid, relOid);
996 
997  /* dbOid is InvalidOid when we are locking a shared relation. */
998  Assert(OidIsValid(relOid));
999 
1000  /* Create a hash entry for this xid, if we don't have one already. */
1001  xidentry = hash_search(RecoveryLockXidHash, &xid, HASH_ENTER, &found);
1002  if (!found)
1003  {
1004  Assert(xidentry->xid == xid); /* dynahash should have set this */
1005  xidentry->head = NULL;
1006  }
1007 
1008  /* Create a hash entry for this lock, unless we have one already. */
1009  key.xid = xid;
1010  key.dbOid = dbOid;
1011  key.relOid = relOid;
1012  lockentry = hash_search(RecoveryLockHash, &key, HASH_ENTER, &found);
1013  if (!found)
1014  {
1015  /* It's new, so link it into the XID's list ... */
1016  lockentry->next = xidentry->head;
1017  xidentry->head = lockentry;
1018 
1019  /* ... and acquire the lock locally. */
1020  SET_LOCKTAG_RELATION(locktag, dbOid, relOid);
1021 
1022  (void) LockAcquire(&locktag, AccessExclusiveLock, true, false);
1023  }
1024 }
1025 
1026 /*
1027  * Release all the locks associated with this RecoveryLockXidEntry.
1028  */
1029 static void
1031 {
1032  RecoveryLockEntry *entry;
1034 
1035  for (entry = xidentry->head; entry != NULL; entry = next)
1036  {
1037  LOCKTAG locktag;
1038 
1040  "releasing recovery lock: xid %u db %u rel %u",
1041  entry->key.xid, entry->key.dbOid, entry->key.relOid);
1042  /* Release the lock ... */
1043  SET_LOCKTAG_RELATION(locktag, entry->key.dbOid, entry->key.relOid);
1044  if (!LockRelease(&locktag, AccessExclusiveLock, true))
1045  {
1046  elog(LOG,
1047  "RecoveryLockHash contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u",
1048  entry->key.xid, entry->key.dbOid, entry->key.relOid);
1049  Assert(false);
1050  }
1051  /* ... and remove the per-lock hash entry */
1052  next = entry->next;
1053  hash_search(RecoveryLockHash, entry, HASH_REMOVE, NULL);
1054  }
1055 
1056  xidentry->head = NULL; /* just for paranoia */
1057 }
1058 
1059 /*
1060  * Release locks for specific XID, or all locks if it's InvalidXid.
1061  */
1062 static void
1064 {
1065  RecoveryLockXidEntry *entry;
1066 
1067  if (TransactionIdIsValid(xid))
1068  {
1069  if ((entry = hash_search(RecoveryLockXidHash, &xid, HASH_FIND, NULL)))
1070  {
1073  }
1074  }
1075  else
1077 }
1078 
1079 /*
1080  * Release locks for a transaction tree, starting at xid down, from
1081  * RecoveryLockXidHash.
1082  *
1083  * Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode,
1084  * to remove any AccessExclusiveLocks requested by a transaction.
1085  */
1086 void
1088 {
1089  int i;
1090 
1091  StandbyReleaseLocks(xid);
1092 
1093  for (i = 0; i < nsubxids; i++)
1094  StandbyReleaseLocks(subxids[i]);
1095 }
1096 
1097 /*
1098  * Called at end of recovery and when we see a shutdown checkpoint.
1099  */
1100 void
1102 {
1104  RecoveryLockXidEntry *entry;
1105 
1106  elog(trace_recovery(DEBUG2), "release all standby locks");
1107 
1109  while ((entry = hash_seq_search(&status)))
1110  {
1113  }
1114 }
1115 
1116 /*
1117  * StandbyReleaseOldLocks
1118  * Release standby locks held by top-level XIDs that aren't running,
1119  * as long as they're not prepared transactions.
1120  */
1121 void
1123 {
1125  RecoveryLockXidEntry *entry;
1126 
1128  while ((entry = hash_seq_search(&status)))
1129  {
1130  Assert(TransactionIdIsValid(entry->xid));
1131 
1132  /* Skip if prepared transaction. */
1133  if (StandbyTransactionIdIsPrepared(entry->xid))
1134  continue;
1135 
1136  /* Skip if >= oldxid. */
1137  if (!TransactionIdPrecedes(entry->xid, oldxid))
1138  continue;
1139 
1140  /* Remove all locks and hash table entry. */
1143  }
1144 }
1145 
1146 /*
1147  * --------------------------------------------------------------------
1148  * Recovery handling for Rmgr RM_STANDBY_ID
1149  *
1150  * These record types will only be created if XLogStandbyInfoActive()
1151  * --------------------------------------------------------------------
1152  */
1153 
1154 void
1156 {
1157  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1158 
1159  /* Backup blocks are not used in standby records */
1160  Assert(!XLogRecHasAnyBlockRefs(record));
1161 
1162  /* Do nothing if we're not in hot standby mode */
1164  return;
1165 
1166  if (info == XLOG_STANDBY_LOCK)
1167  {
1168  xl_standby_locks *xlrec = (xl_standby_locks *) XLogRecGetData(record);
1169  int i;
1170 
1171  for (i = 0; i < xlrec->nlocks; i++)
1173  xlrec->locks[i].dbOid,
1174  xlrec->locks[i].relOid);
1175  }
1176  else if (info == XLOG_RUNNING_XACTS)
1177  {
1178  xl_running_xacts *xlrec = (xl_running_xacts *) XLogRecGetData(record);
1179  RunningTransactionsData running;
1180 
1181  running.xcnt = xlrec->xcnt;
1182  running.subxcnt = xlrec->subxcnt;
1183  running.subxid_overflow = xlrec->subxid_overflow;
1184  running.nextXid = xlrec->nextXid;
1185  running.latestCompletedXid = xlrec->latestCompletedXid;
1186  running.oldestRunningXid = xlrec->oldestRunningXid;
1187  running.xids = xlrec->xids;
1188 
1189  ProcArrayApplyRecoveryInfo(&running);
1190  }
1191  else if (info == XLOG_INVALIDATIONS)
1192  {
1193  xl_invalidations *xlrec = (xl_invalidations *) XLogRecGetData(record);
1194 
1196  xlrec->nmsgs,
1197  xlrec->relcacheInitFileInval,
1198  xlrec->dbId,
1199  xlrec->tsId);
1200  }
1201  else
1202  elog(PANIC, "standby_redo: unknown op code %u", info);
1203 }
1204 
1205 /*
1206  * Log details of the current snapshot to WAL. This allows the snapshot state
1207  * to be reconstructed on the standby and for logical decoding.
1208  *
1209  * This is used for Hot Standby as follows:
1210  *
1211  * We can move directly to STANDBY_SNAPSHOT_READY at startup if we
1212  * start from a shutdown checkpoint because we know nothing was running
1213  * at that time and our recovery snapshot is known empty. In the more
1214  * typical case of an online checkpoint we need to jump through a few
1215  * hoops to get a correct recovery snapshot and this requires a two or
1216  * sometimes a three stage process.
1217  *
1218  * The initial snapshot must contain all running xids and all current
1219  * AccessExclusiveLocks at a point in time on the standby. Assembling
1220  * that information while the server is running requires many and
1221  * various LWLocks, so we choose to derive that information piece by
1222  * piece and then re-assemble that info on the standby. When that
1223  * information is fully assembled we move to STANDBY_SNAPSHOT_READY.
1224  *
1225  * Since locking on the primary when we derive the information is not
1226  * strict, we note that there is a time window between the derivation and
1227  * writing to WAL of the derived information. That allows race conditions
1228  * that we must resolve, since xids and locks may enter or leave the
1229  * snapshot during that window. This creates the issue that an xid or
1230  * lock may start *after* the snapshot has been derived yet *before* the
1231  * snapshot is logged in the running xacts WAL record. We resolve this by
1232  * starting to accumulate changes at a point just prior to when we derive
1233  * the snapshot on the primary, then ignore duplicates when we later apply
1234  * the snapshot from the running xacts record. This is implemented during
1235  * CreateCheckPoint() where we use the logical checkpoint location as
1236  * our starting point and then write the running xacts record immediately
1237  * before writing the main checkpoint WAL record. Since we always start
1238  * up from a checkpoint and are immediately at our starting point, we
1239  * unconditionally move to STANDBY_INITIALIZED. After this point we
1240  * must do 4 things:
1241  * * move shared nextXid forwards as we see new xids
1242  * * extend the clog and subtrans with each new xid
1243  * * keep track of uncommitted known assigned xids
1244  * * keep track of uncommitted AccessExclusiveLocks
1245  *
1246  * When we see a commit/abort we must remove known assigned xids and locks
1247  * from the completing transaction. Attempted removals that cannot locate
1248  * an entry are expected and must not cause an error when we are in state
1249  * STANDBY_INITIALIZED. This is implemented in StandbyReleaseLocks() and
1250  * KnownAssignedXidsRemove().
1251  *
1252  * Later, when we apply the running xact data we must be careful to ignore
1253  * transactions already committed, since those commits raced ahead when
1254  * making WAL entries.
1255  *
1256  * The loose timing also means that locks may be recorded that have a
1257  * zero xid, since xids are removed from procs before locks are removed.
1258  * So we must prune the lock list down to ensure we hold locks only for
1259  * currently running xids, performed by StandbyReleaseOldLocks().
1260  * Zero xids should no longer be possible, but we may be replaying WAL
1261  * from a time when they were possible.
1262  *
1263  * For logical decoding only the running xacts information is needed;
1264  * there's no need to look at the locking information, but it's logged anyway,
1265  * as there's no independent knob to just enable logical decoding. For
1266  * details of how this is used, check snapbuild.c's introductory comment.
1267  *
1268  *
1269  * Returns the RecPtr of the last inserted record.
1270  */
1271 XLogRecPtr
1273 {
1274  XLogRecPtr recptr;
1275  RunningTransactions running;
1276  xl_standby_lock *locks;
1277  int nlocks;
1278 
1280 
1281  /*
1282  * Get details of any AccessExclusiveLocks being held at the moment.
1283  */
1284  locks = GetRunningTransactionLocks(&nlocks);
1285  if (nlocks > 0)
1286  LogAccessExclusiveLocks(nlocks, locks);
1287  pfree(locks);
1288 
1289  /*
1290  * Log details of all in-progress transactions. This should be the last
1291  * record we write, because standby will open up when it sees this.
1292  */
1293  running = GetRunningTransactionData();
1294 
1295  /*
1296  * GetRunningTransactionData() acquired ProcArrayLock, we must release it.
1297  * For Hot Standby this can be done before inserting the WAL record
1298  * because ProcArrayApplyRecoveryInfo() rechecks the commit status using
1299  * the clog. For logical decoding, though, the lock can't be released
1300  * early because the clog might be "in the future" from the POV of the
1301  * historic snapshot. This would allow for situations where we're waiting
1302  * for the end of a transaction listed in the xl_running_xacts record
1303  * which, according to the WAL, has committed before the xl_running_xacts
1304  * record. Fortunately this routine isn't executed frequently, and it's
1305  * only a shared lock.
1306  */
1308  LWLockRelease(ProcArrayLock);
1309 
1310  recptr = LogCurrentRunningXacts(running);
1311 
1312  /* Release lock if we kept it longer ... */
1314  LWLockRelease(ProcArrayLock);
1315 
1316  /* GetRunningTransactionData() acquired XidGenLock, we must release it */
1317  LWLockRelease(XidGenLock);
1318 
1319  return recptr;
1320 }
1321 
1322 /*
1323  * Record an enhanced snapshot of running transactions into WAL.
1324  *
1325  * The definitions of RunningTransactionsData and xl_running_xacts are
1326  * similar. We keep them separate because xl_running_xacts is a contiguous
1327  * chunk of memory and never exists fully until it is assembled in WAL.
1328  * The inserted records are marked as not being important for durability,
1329  * to avoid triggering superfluous checkpoint / archiving activity.
1330  */
1331 static XLogRecPtr
1333 {
1334  xl_running_xacts xlrec;
1335  XLogRecPtr recptr;
1336 
1337  xlrec.xcnt = CurrRunningXacts->xcnt;
1338  xlrec.subxcnt = CurrRunningXacts->subxcnt;
1339  xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow;
1340  xlrec.nextXid = CurrRunningXacts->nextXid;
1341  xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
1342  xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
1343 
1344  /* Header */
1345  XLogBeginInsert();
1347  XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts);
1348 
1349  /* array of TransactionIds */
1350  if (xlrec.xcnt > 0)
1351  XLogRegisterData((char *) CurrRunningXacts->xids,
1352  (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
1353 
1354  recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
1355 
1356  if (CurrRunningXacts->subxid_overflow)
1358  "snapshot of %u running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
1359  CurrRunningXacts->xcnt,
1360  LSN_FORMAT_ARGS(recptr),
1361  CurrRunningXacts->oldestRunningXid,
1362  CurrRunningXacts->latestCompletedXid,
1363  CurrRunningXacts->nextXid);
1364  else
1366  "snapshot of %u+%u running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
1367  CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt,
1368  LSN_FORMAT_ARGS(recptr),
1369  CurrRunningXacts->oldestRunningXid,
1370  CurrRunningXacts->latestCompletedXid,
1371  CurrRunningXacts->nextXid);
1372 
1373  /*
1374  * Ensure running_xacts information is synced to disk not too far in the
1375  * future. We don't want to stall anything though (i.e. use XLogFlush()),
1376  * so we let the wal writer do it during normal operation.
1377  * XLogSetAsyncXactLSN() conveniently will mark the LSN as to-be-synced
1378  * and nudge the WALWriter into action if sleeping. Check
1379  * XLogBackgroundFlush() for details why a record might not be flushed
1380  * without it.
1381  */
1382  XLogSetAsyncXactLSN(recptr);
1383 
1384  return recptr;
1385 }
1386 
1387 /*
1388  * Wholesale logging of AccessExclusiveLocks. Other lock types need not be
1389  * logged, as described in backend/storage/lmgr/README.
1390  */
1391 static void
1393 {
1394  xl_standby_locks xlrec;
1395 
1396  xlrec.nlocks = nlocks;
1397 
1398  XLogBeginInsert();
1399  XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks));
1400  XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock));
1402 
1403  (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
1404 }
1405 
1406 /*
1407  * Individual logging of AccessExclusiveLocks for use during LockAcquire()
1408  */
1409 void
1411 {
1412  xl_standby_lock xlrec;
1413 
1414  xlrec.xid = GetCurrentTransactionId();
1415 
1416  xlrec.dbOid = dbOid;
1417  xlrec.relOid = relOid;
1418 
1419  LogAccessExclusiveLocks(1, &xlrec);
1421 }
1422 
1423 /*
1424  * Prepare to log an AccessExclusiveLock, for use during LockAcquire()
1425  */
1426 void
1428 {
1429  /*
1430  * Ensure that a TransactionId has been assigned to this transaction, for
1431  * two reasons, both related to lock release on the standby. First, we
1432  * must assign an xid so that RecordTransactionCommit() and
1433  * RecordTransactionAbort() do not optimise away the transaction
1434  * completion record which recovery relies upon to release locks. It's a
1435  * hack, but for a corner case not worth adding code for into the main
1436  * commit path. Second, we must assign an xid before the lock is recorded
1437  * in shared memory, otherwise a concurrently executing
1438  * GetRunningTransactionLocks() might see a lock associated with an
1439  * InvalidTransactionId which we later assert cannot happen.
1440  */
1441  (void) GetCurrentTransactionId();
1442 }
1443 
1444 /*
1445  * Emit WAL for invalidations. This currently is only used for commits without
1446  * an xid but which contain invalidations.
1447  */
1448 void
1450  bool relcacheInitFileInval)
1451 {
1452  xl_invalidations xlrec;
1453 
1454  /* prepare record */
1455  memset(&xlrec, 0, sizeof(xlrec));
1456  xlrec.dbId = MyDatabaseId;
1457  xlrec.tsId = MyDatabaseTableSpace;
1458  xlrec.relcacheInitFileInval = relcacheInitFileInval;
1459  xlrec.nmsgs = nmsgs;
1460 
1461  /* perform insertion */
1462  XLogBeginInsert();
1463  XLogRegisterData((char *) (&xlrec), MinSizeOfInvalidations);
1464  XLogRegisterData((char *) msgs,
1465  nmsgs * sizeof(SharedInvalidationMessage));
1466  XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS);
1467 }
1468 
1469 /* Return the description of recovery conflict */
1470 static const char *
1472 {
1473  const char *reasonDesc = _("unknown reason");
1474 
1475  switch (reason)
1476  {
1478  reasonDesc = _("recovery conflict on buffer pin");
1479  break;
1481  reasonDesc = _("recovery conflict on lock");
1482  break;
1484  reasonDesc = _("recovery conflict on tablespace");
1485  break;
1487  reasonDesc = _("recovery conflict on snapshot");
1488  break;
1490  reasonDesc = _("recovery conflict on buffer deadlock");
1491  break;
1493  reasonDesc = _("recovery conflict on database");
1494  break;
1495  default:
1496  break;
1497  }
1498 
1499  return reasonDesc;
1500 }
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:433
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:424
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1664
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1719
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1573
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1537
static int32 next
Definition: blutils.c:219
static void cleanup(void)
Definition: bootstrap.c:696
bool HoldingBufferPinThatDelaysRecovery(void)
Definition: bufmgr.c:4380
unsigned int uint32
Definition: c.h:442
unsigned char uint8
Definition: c.h:440
uint32 TransactionId
Definition: c.h:588
#define OidIsValid(objectId)
Definition: c.h:711
int64 TimestampTz
Definition: timestamp.h:39
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:863
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:953
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:350
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1431
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1421
int errdetail(const char *fmt,...)
Definition: elog.c:1039
int errcode(int sqlerrcode)
Definition: elog.c:695
int errdetail_log_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1108
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define _(x)
Definition: elog.c:90
int trace_recovery(int trace_level)
Definition: elog.c:3601
#define LOG
Definition: elog.h:27
#define DEBUG2
Definition: elog.h:25
#define PANIC
Definition: elog.h:38
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
#define DEBUG4
Definition: elog.h:23
BackendId MyBackendId
Definition: globals.c:85
Oid MyDatabaseTableSpace
Definition: globals.c:91
Oid MyDatabaseId
Definition: globals.c:89
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
void ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, int nmsgs, bool RelcacheInitFileInval, Oid dbid, Oid tsid)
Definition: inval.c:963
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
LockAcquireResult LockAcquire(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock, bool dontWait)
Definition: lock.c:747
VirtualTransactionId * GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp)
Definition: lock.c:2914
void VirtualXactLockTableInsert(VirtualTransactionId vxid)
Definition: lock.c:4476
xl_standby_lock * GetRunningTransactionLocks(int *nlocks)
Definition: lock.c:4018
bool LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
Definition: lock.c:1975
void VirtualXactLockTableCleanup(void)
Definition: lock.c:4499
bool VirtualXactLock(VirtualTransactionId vxid, bool wait)
Definition: lock.c:4599
#define VirtualTransactionIdIsValid(vxid)
Definition: lock.h:72
#define SET_LOCKTAG_RELATION(locktag, dboid, reloid)
Definition: lock.h:184
#define AccessExclusiveLock
Definition: lockdefs.h:43
struct xl_standby_lock xl_standby_lock
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1802
void pfree(void *pointer)
Definition: mcxt.c:1306
void * palloc(Size size)
Definition: mcxt.c:1199
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
const void size_t len
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225
static char * buf
Definition: pg_test_fsync.c:67
#define ERRCODE_T_R_DEADLOCK_DETECTED
Definition: pgbench.c:76
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3463
void ExpireAllKnownAssignedTransactionIds(void)
Definition: procarray.c:4487
VirtualTransactionId * GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
Definition: procarray.c:3383
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2729
void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3626
pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
Definition: procarray.c:3457
int CountDBBackends(Oid databaseid)
Definition: procarray.c:3565
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1056
ProcSignalReason
Definition: procsignal.h:31
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition: procsignal.h:44
@ PROCSIG_RECOVERY_CONFLICT_LOCK
Definition: procsignal.h:42
@ PROCSIG_RECOVERY_CONFLICT_DATABASE
Definition: procsignal.h:40
@ PROCSIG_RECOVERY_CONFLICT_SNAPSHOT
Definition: procsignal.h:43
@ PROCSIG_RECOVERY_CONFLICT_TABLESPACE
Definition: procsignal.h:41
@ PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK
Definition: procsignal.h:45
const char * get_ps_display(int *displen)
Definition: ps_status.c:414
bool update_process_title
Definition: ps_status.c:35
void set_ps_display(const char *activity)
Definition: ps_status.c:342
void pg_usleep(long microsec)
Definition: signal.c:53
void SharedInvalBackendInit(bool sendOnly)
Definition: sinvaladt.c:266
LocalTransactionId GetNextLocalTransactionId(void)
Definition: sinvaladt.c:775
PGPROC * BackendIdGetProc(int backendID)
Definition: sinvaladt.c:385
PGPROC * MyProc
Definition: proc.c:68
int DeadlockTimeout
Definition: proc.c:60
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1878
void standby_redo(XLogReaderState *record)
Definition: standby.c:1155
static bool WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
Definition: standby.c:234
static volatile sig_atomic_t got_standby_deadlock_timeout
Definition: standby.c:70
static TimestampTz GetStandbyLimitTime(void)
Definition: standby.c:201
void StandbyTimeoutHandler(void)
Definition: standby.c:939
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:787
static volatile sig_atomic_t got_standby_delay_timeout
Definition: standby.c:71
int vacuum_defer_cleanup_age
Definition: standby.c:40
void StandbyLockTimeoutHandler(void)
Definition: standby.c:948
static int standbyWait_us
Definition: standby.c:226
static void StandbyReleaseXidEntryLocks(RecoveryLockXidEntry *xidentry)
Definition: standby.c:1030
void StandbyDeadLockHandler(void)
Definition: standby.c:930
static HTAB * RecoveryLockXidHash
Definition: standby.c:67
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1272
struct RecoveryLockEntry RecoveryLockEntry
void CheckRecoveryConflictDeadlock(void)
Definition: standby.c:899
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:96
void ResolveRecoveryConflictWithTablespace(Oid tsid)
Definition: standby.c:533
static const char * get_recovery_conflict_desc(ProcSignalReason reason)
Definition: standby.c:1471
bool log_recovery_conflict_waits
Definition: standby.c:43
#define STANDBY_INITIAL_WAIT_US
Definition: standby.c:225
void ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHorizon, RelFileLocator locator)
Definition: standby.c:509
static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, ProcSignalReason reason, uint32 wait_event_info, bool report_waiting)
Definition: standby.c:360
static volatile sig_atomic_t got_standby_lock_timeout
Definition: standby.c:72
void ResolveRecoveryConflictWithDatabase(Oid dbid)
Definition: standby.c:563
void StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition: standby.c:1087
void StandbyReleaseOldLocks(TransactionId oldxid)
Definition: standby.c:1122
struct RecoveryLockXidEntry RecoveryLockXidEntry
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, RelFileLocator locator)
Definition: standby.c:477
static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
Definition: standby.c:871
void LogAccessExclusiveLockPrepare(void)
Definition: standby.c:1427
static HTAB * RecoveryLockHash
Definition: standby.c:66
static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
Definition: standby.c:1392
void LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs, bool relcacheInitFileInval)
Definition: standby.c:1449
void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
Definition: standby.c:980
static void StandbyReleaseLocks(TransactionId xid)
Definition: standby.c:1063
void LogAccessExclusiveLock(Oid dbOid, Oid relOid)
Definition: standby.c:1410
int max_standby_archive_delay
Definition: standby.c:41
void StandbyReleaseAllLocks(void)
Definition: standby.c:1101
int max_standby_streaming_delay
Definition: standby.c:42
static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
Definition: standby.c:1332
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:274
void ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
Definition: standby.c:617
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:161
#define MinSizeOfXactRunningXacts
Definition: standby.h:62
#define XLOG_INVALIDATIONS
Definition: standbydefs.h:36
#define MinSizeOfInvalidations
Definition: standbydefs.h:72
#define XLOG_STANDBY_LOCK
Definition: standbydefs.h:34
#define XLOG_RUNNING_XACTS
Definition: standbydefs.h:35
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
TimeoutType type
Definition: timeout.h:60
TimestampTz fin_time
Definition: timeout.h:62
TimeoutId id
Definition: timeout.h:59
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
Definition: dynahash.c:220
Definition: lock.h:168
uint8 locktag_type
Definition: lock.h:173
Definition: proc.h:162
pg_atomic_uint64 waitStart
Definition: proc.h:228
int pid
Definition: proc.h:186
struct RecoveryLockEntry * next
Definition: standby.c:57
xl_standby_lock key
Definition: standby.c:56
TransactionId xid
Definition: standby.c:62
struct RecoveryLockEntry * head
Definition: standby.c:63
TransactionId oldestRunningXid
Definition: standby.h:83
TransactionId nextXid
Definition: standby.h:82
TransactionId latestCompletedXid
Definition: standby.h:84
TransactionId * xids
Definition: standby.h:86
LocalTransactionId localTransactionId
Definition: lock.h:67
BackendId backendId
Definition: lock.h:66
SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:69
bool relcacheInitFileInval
Definition: standbydefs.h:67
TransactionId latestCompletedXid
Definition: standbydefs.h:54
TransactionId oldestRunningXid
Definition: standbydefs.h:53
TransactionId xids[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:56
TransactionId nextXid
Definition: standbydefs.h:52
TransactionId xid
Definition: lockdefs.h:51
xl_standby_lock locks[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:41
void disable_all_timeouts(bool keep_indicators)
Definition: timeout.c:755
void enable_timeouts(const EnableTimeoutParams *timeouts, int count)
Definition: timeout.c:634
@ STANDBY_LOCK_TIMEOUT
Definition: timeout.h:32
@ STANDBY_DEADLOCK_TIMEOUT
Definition: timeout.h:30
@ STANDBY_TIMEOUT
Definition: timeout.h:31
@ TMPARAM_AT
Definition: timeout.h:53
@ TMPARAM_AFTER
Definition: timeout.h:52
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:125
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:273
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:181
#define InvalidTransactionId
Definition: transam.h:31
#define U64FromFullTransactionId(x)
Definition: transam.h:49
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define MaxTransactionId
Definition: transam.h:35
bool StandbyTransactionIdIsPrepared(TransactionId xid)
Definition: twophase.c:1452
#define TimestampTzPlusMilliseconds(tz, ms)
Definition: timestamp.h:84
FullTransactionId ReadNextFullTransactionId(void)
Definition: varsup.c:261
@ WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE
Definition: wait_event.h:121
@ WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT
Definition: wait_event.h:120
#define PG_WAIT_BUFFER_PIN
Definition: wait_event.h:20
#define PG_WAIT_LOCK
Definition: wait_event.h:19
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:268
static void pgstat_report_wait_end(void)
Definition: wait_event.h:284
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:444
int MyXactFlags
Definition: xact.c:135
#define XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK
Definition: xact.h:108
int wal_level
Definition: xlog.c:134
void XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
Definition: xlog.c:2357
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:150
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:71
#define XLogStandbyInfoActive()
Definition: xlog.h:118
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:351
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:451
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:433
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:411
#define XLogRecGetData(decoder)
Definition: xlogreader.h:416
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:418
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream)
HotStandbyState standbyState
Definition: xlogutils.c:56
bool InRecovery
Definition: xlogutils.c:53
@ STANDBY_DISABLED
Definition: xlogutils.h:49
@ STANDBY_INITIALIZED
Definition: xlogutils.h:50
#define InHotStandby
Definition: xlogutils.h:57