PostgreSQL Source Code  git master
procarray.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * procarray.c
4  * POSTGRES process array code.
5  *
6  *
7  * This module maintains arrays of the PGPROC and PGXACT structures for all
8  * active backends. Although there are several uses for this, the principal
9  * one is as a means of determining the set of currently running transactions.
10  *
11  * Because of various subtle race conditions it is critical that a backend
12  * hold the correct locks while setting or clearing its MyPgXact->xid field.
13  * See notes in src/backend/access/transam/README.
14  *
15  * The process arrays now also include structures representing prepared
16  * transactions. The xid and subxids fields of these are valid, as are the
17  * myProcLocks lists. They can be distinguished from regular backend PGPROCs
18  * at need by checking for pid == 0.
19  *
20  * During hot standby, we also keep a list of XIDs representing transactions
21  * that are known to be running in the master (or more precisely, were running
22  * as of the current point in the WAL stream). This list is kept in the
23  * KnownAssignedXids array, and is updated by watching the sequence of
24  * arriving XIDs. This is necessary because if we leave those XIDs out of
25  * snapshots taken for standby queries, then they will appear to be already
26  * complete, leading to MVCC failures. Note that in hot standby, the PGPROC
27  * array represents standby processes, which by definition are not running
28  * transactions that have XIDs.
29  *
30  * It is perhaps possible for a backend on the master to terminate without
31  * writing an abort record for its transaction. While that shouldn't really
32  * happen, it would tie up KnownAssignedXids indefinitely, so we protect
33  * ourselves by pruning the array when a valid list of running XIDs arrives.
34  *
35  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/storage/ipc/procarray.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 #include "postgres.h"
45 
46 #include <signal.h>
47 
48 #include "access/clog.h"
49 #include "access/subtrans.h"
50 #include "access/transam.h"
51 #include "access/twophase.h"
52 #include "access/xact.h"
53 #include "access/xlog.h"
54 #include "catalog/catalog.h"
55 #include "catalog/pg_authid.h"
56 #include "commands/dbcommands.h"
57 #include "miscadmin.h"
58 #include "pgstat.h"
59 #include "storage/proc.h"
60 #include "storage/procarray.h"
61 #include "storage/spin.h"
62 #include "utils/acl.h"
63 #include "utils/builtins.h"
64 #include "utils/rel.h"
65 #include "utils/snapmgr.h"
66 
67 #define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
68 
69 /* Our shared memory area */
70 typedef struct ProcArrayStruct
71 {
72  int numProcs; /* number of valid procs entries */
73  int maxProcs; /* allocated size of procs array */
74 
75  /*
76  * Known assigned XIDs handling
77  */
78  int maxKnownAssignedXids; /* allocated size of array */
79  int numKnownAssignedXids; /* current # of valid entries */
80  int tailKnownAssignedXids; /* index of oldest valid element */
81  int headKnownAssignedXids; /* index of newest element, + 1 */
82  slock_t known_assigned_xids_lck; /* protects head/tail pointers */
83 
84  /*
85  * Highest subxid that has been removed from KnownAssignedXids array to
86  * prevent overflow; or InvalidTransactionId if none. We track this for
87  * similar reasons to tracking overflowing cached subxids in PGXACT
88  * entries. Must hold exclusive ProcArrayLock to change this, and shared
89  * lock to read it.
90  */
92 
93  /* oldest xmin of any replication slot */
95  /* oldest catalog xmin of any replication slot */
97 
98  /* indexes into allPgXact[], has PROCARRAY_MAXPROCS entries */
101 
103 
104 static PGPROC *allProcs;
106 
107 /*
108  * Bookkeeping for tracking emulated transactions in recovery
109  */
113 
114 /*
115  * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
116  * the highest xid that might still be running that we don't have in
117  * KnownAssignedXids.
118  */
120 
121 #ifdef XIDCACHE_DEBUG
122 
123 /* counters for XidCache measurement */
124 static long xc_by_recent_xmin = 0;
125 static long xc_by_known_xact = 0;
126 static long xc_by_my_xact = 0;
127 static long xc_by_latest_xid = 0;
128 static long xc_by_main_xid = 0;
129 static long xc_by_child_xid = 0;
130 static long xc_by_known_assigned = 0;
131 static long xc_no_overflow = 0;
132 static long xc_slow_answer = 0;
133 
134 #define xc_by_recent_xmin_inc() (xc_by_recent_xmin++)
135 #define xc_by_known_xact_inc() (xc_by_known_xact++)
136 #define xc_by_my_xact_inc() (xc_by_my_xact++)
137 #define xc_by_latest_xid_inc() (xc_by_latest_xid++)
138 #define xc_by_main_xid_inc() (xc_by_main_xid++)
139 #define xc_by_child_xid_inc() (xc_by_child_xid++)
140 #define xc_by_known_assigned_inc() (xc_by_known_assigned++)
141 #define xc_no_overflow_inc() (xc_no_overflow++)
142 #define xc_slow_answer_inc() (xc_slow_answer++)
143 
144 static void DisplayXidCache(void);
145 #else /* !XIDCACHE_DEBUG */
146 
147 #define xc_by_recent_xmin_inc() ((void) 0)
148 #define xc_by_known_xact_inc() ((void) 0)
149 #define xc_by_my_xact_inc() ((void) 0)
150 #define xc_by_latest_xid_inc() ((void) 0)
151 #define xc_by_main_xid_inc() ((void) 0)
152 #define xc_by_child_xid_inc() ((void) 0)
153 #define xc_by_known_assigned_inc() ((void) 0)
154 #define xc_no_overflow_inc() ((void) 0)
155 #define xc_slow_answer_inc() ((void) 0)
156 #endif /* XIDCACHE_DEBUG */
157 
158 /* Primitives for KnownAssignedXids array handling for standby */
159 static void KnownAssignedXidsCompress(bool force);
160 static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
161  bool exclusive_lock);
162 static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
163 static bool KnownAssignedXidExists(TransactionId xid);
164 static void KnownAssignedXidsRemove(TransactionId xid);
165 static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
166  TransactionId *subxids);
168 static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
170  TransactionId *xmin,
171  TransactionId xmax);
173 static void KnownAssignedXidsDisplay(int trace_level);
174 static void KnownAssignedXidsReset(void);
175 static inline void ProcArrayEndTransactionInternal(PGPROC *proc,
176  PGXACT *pgxact, TransactionId latestXid);
177 static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
178 
179 /*
180  * Report shared-memory space needed by CreateSharedProcArray.
181  */
182 Size
184 {
185  Size size;
186 
187  /* Size of the ProcArray structure itself */
188 #define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts)
189 
191  size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
192 
193  /*
194  * During Hot Standby processing we have a data structure called
195  * KnownAssignedXids, created in shared memory. Local data structures are
196  * also created in various backends during GetSnapshotData(),
197  * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
198  * main structures created in those functions must be identically sized,
199  * since we may at times copy the whole of the data structures around. We
200  * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
201  *
202  * Ideally we'd only create this structure if we were actually doing hot
203  * standby in the current run, but we don't know that yet at the time
204  * shared memory is being set up.
205  */
206 #define TOTAL_MAX_CACHED_SUBXIDS \
207  ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
208 
209  if (EnableHotStandby)
210  {
211  size = add_size(size,
212  mul_size(sizeof(TransactionId),
214  size = add_size(size,
215  mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
216  }
217 
218  return size;
219 }
220 
221 /*
222  * Initialize the shared PGPROC array during postmaster startup.
223  */
224 void
226 {
227  bool found;
228 
229  /* Create or attach to the ProcArray shared structure */
230  procArray = (ProcArrayStruct *)
231  ShmemInitStruct("Proc Array",
233  mul_size(sizeof(int),
235  &found);
236 
237  if (!found)
238  {
239  /*
240  * We're the first - initialize.
241  */
242  procArray->numProcs = 0;
243  procArray->maxProcs = PROCARRAY_MAXPROCS;
245  procArray->numKnownAssignedXids = 0;
246  procArray->tailKnownAssignedXids = 0;
247  procArray->headKnownAssignedXids = 0;
252  }
253 
254  allProcs = ProcGlobal->allProcs;
255  allPgXact = ProcGlobal->allPgXact;
256 
257  /* Create or attach to the KnownAssignedXids arrays too, if needed */
258  if (EnableHotStandby)
259  {
261  ShmemInitStruct("KnownAssignedXids",
262  mul_size(sizeof(TransactionId),
264  &found);
265  KnownAssignedXidsValid = (bool *)
266  ShmemInitStruct("KnownAssignedXidsValid",
267  mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
268  &found);
269  }
270 }
271 
272 /*
273  * Add the specified PGPROC to the shared array.
274  */
275 void
277 {
278  ProcArrayStruct *arrayP = procArray;
279  int index;
280 
281  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
282 
283  if (arrayP->numProcs >= arrayP->maxProcs)
284  {
285  /*
286  * Oops, no room. (This really shouldn't happen, since there is a
287  * fixed supply of PGPROC structs too, and so we should have failed
288  * earlier.)
289  */
290  LWLockRelease(ProcArrayLock);
291  ereport(FATAL,
292  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
293  errmsg("sorry, too many clients already")));
294  }
295 
296  /*
297  * Keep the procs array sorted by (PGPROC *) so that we can utilize
298  * locality of references much better. This is useful while traversing the
299  * ProcArray because there is an increased likelihood of finding the next
300  * PGPROC structure in the cache.
301  *
302  * Since the occurrence of adding/removing a proc is much lower than the
303  * access to the ProcArray itself, the overhead should be marginal
304  */
305  for (index = 0; index < arrayP->numProcs; index++)
306  {
307  /*
308  * If we are the first PGPROC or if we have found our right position
309  * in the array, break
310  */
311  if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno))
312  break;
313  }
314 
315  memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index],
316  (arrayP->numProcs - index) * sizeof(int));
317  arrayP->pgprocnos[index] = proc->pgprocno;
318  arrayP->numProcs++;
319 
320  LWLockRelease(ProcArrayLock);
321 }
322 
323 /*
324  * Remove the specified PGPROC from the shared array.
325  *
326  * When latestXid is a valid XID, we are removing a live 2PC gxact from the
327  * array, and thus causing it to appear as "not running" anymore. In this
328  * case we must advance latestCompletedXid. (This is essentially the same
329  * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
330  * the ProcArrayLock only once, and don't damage the content of the PGPROC;
331  * twophase.c depends on the latter.)
332  */
333 void
335 {
336  ProcArrayStruct *arrayP = procArray;
337  int index;
338 
339 #ifdef XIDCACHE_DEBUG
340  /* dump stats at backend shutdown, but not prepared-xact end */
341  if (proc->pid != 0)
342  DisplayXidCache();
343 #endif
344 
345  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
346 
347  if (TransactionIdIsValid(latestXid))
348  {
349  Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
350 
351  /* Advance global latestCompletedXid while holding the lock */
353  latestXid))
355  }
356  else
357  {
358  /* Shouldn't be trying to remove a live transaction here */
359  Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
360  }
361 
362  for (index = 0; index < arrayP->numProcs; index++)
363  {
364  if (arrayP->pgprocnos[index] == proc->pgprocno)
365  {
366  /* Keep the PGPROC array sorted. See notes above */
367  memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1],
368  (arrayP->numProcs - index - 1) * sizeof(int));
369  arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
370  arrayP->numProcs--;
371  LWLockRelease(ProcArrayLock);
372  return;
373  }
374  }
375 
376  /* Oops */
377  LWLockRelease(ProcArrayLock);
378 
379  elog(LOG, "failed to find proc %p in ProcArray", proc);
380 }
381 
382 
383 /*
384  * ProcArrayEndTransaction -- mark a transaction as no longer running
385  *
386  * This is used interchangeably for commit and abort cases. The transaction
387  * commit/abort must already be reported to WAL and pg_xact.
388  *
389  * proc is currently always MyProc, but we pass it explicitly for flexibility.
390  * latestXid is the latest Xid among the transaction's main XID and
391  * subtransactions, or InvalidTransactionId if it has no XID. (We must ask
392  * the caller to pass latestXid, instead of computing it from the PGPROC's
393  * contents, because the subxid information in the PGPROC might be
394  * incomplete.)
395  */
396 void
398 {
399  PGXACT *pgxact = &allPgXact[proc->pgprocno];
400 
401  if (TransactionIdIsValid(latestXid))
402  {
403  /*
404  * We must lock ProcArrayLock while clearing our advertised XID, so
405  * that we do not exit the set of "running" transactions while someone
406  * else is taking a snapshot. See discussion in
407  * src/backend/access/transam/README.
408  */
409  Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
410 
411  /*
412  * If we can immediately acquire ProcArrayLock, we clear our own XID
413  * and release the lock. If not, use group XID clearing to improve
414  * efficiency.
415  */
416  if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
417  {
418  ProcArrayEndTransactionInternal(proc, pgxact, latestXid);
419  LWLockRelease(ProcArrayLock);
420  }
421  else
422  ProcArrayGroupClearXid(proc, latestXid);
423  }
424  else
425  {
426  /*
427  * If we have no XID, we don't need to lock, since we won't affect
428  * anyone else's calculation of a snapshot. We might change their
429  * estimate of global xmin, but that's OK.
430  */
431  Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
432 
434  pgxact->xmin = InvalidTransactionId;
435  /* must be cleared with xid/xmin: */
437  proc->delayChkpt = false; /* be sure this is cleared in abort */
438  proc->recoveryConflictPending = false;
439 
440  Assert(pgxact->nxids == 0);
441  Assert(pgxact->overflowed == false);
442  }
443 }
444 
445 /*
446  * Mark a write transaction as no longer running.
447  *
448  * We don't do any locking here; caller must handle that.
449  */
450 static inline void
452  TransactionId latestXid)
453 {
454  pgxact->xid = InvalidTransactionId;
456  pgxact->xmin = InvalidTransactionId;
457  /* must be cleared with xid/xmin: */
459  proc->delayChkpt = false; /* be sure this is cleared in abort */
460  proc->recoveryConflictPending = false;
461 
462  /* Clear the subtransaction-XID cache too while holding the lock */
463  pgxact->nxids = 0;
464  pgxact->overflowed = false;
465 
466  /* Also advance global latestCompletedXid while holding the lock */
468  latestXid))
470 }
471 
472 /*
473  * ProcArrayGroupClearXid -- group XID clearing
474  *
475  * When we cannot immediately acquire ProcArrayLock in exclusive mode at
476  * commit time, add ourselves to a list of processes that need their XIDs
477  * cleared. The first process to add itself to the list will acquire
478  * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
479  * on behalf of all group members. This avoids a great deal of contention
480  * around ProcArrayLock when many processes are trying to commit at once,
481  * since the lock need not be repeatedly handed off from one committing
482  * process to the next.
483  */
484 static void
486 {
487  PROC_HDR *procglobal = ProcGlobal;
488  uint32 nextidx;
489  uint32 wakeidx;
490 
491  /* We should definitely have an XID to clear. */
492  Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
493 
494  /* Add ourselves to the list of processes needing a group XID clear. */
495  proc->procArrayGroupMember = true;
496  proc->procArrayGroupMemberXid = latestXid;
497  nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
498  while (true)
499  {
500  pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx);
501 
503  &nextidx,
504  (uint32) proc->pgprocno))
505  break;
506  }
507 
508  /*
509  * If the list was not empty, the leader will clear our XID. It is
510  * impossible to have followers without a leader because the first process
511  * that has added itself to the list will always have nextidx as
512  * INVALID_PGPROCNO.
513  */
514  if (nextidx != INVALID_PGPROCNO)
515  {
516  int extraWaits = 0;
517 
518  /* Sleep until the leader clears our XID. */
520  for (;;)
521  {
522  /* acts as a read barrier */
523  PGSemaphoreLock(proc->sem);
524  if (!proc->procArrayGroupMember)
525  break;
526  extraWaits++;
527  }
529 
531 
532  /* Fix semaphore count for any absorbed wakeups */
533  while (extraWaits-- > 0)
534  PGSemaphoreUnlock(proc->sem);
535  return;
536  }
537 
538  /* We are the leader. Acquire the lock on behalf of everyone. */
539  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
540 
541  /*
542  * Now that we've got the lock, clear the list of processes waiting for
543  * group XID clearing, saving a pointer to the head of the list. Trying
544  * to pop elements one at a time could lead to an ABA problem.
545  */
546  nextidx = pg_atomic_exchange_u32(&procglobal->procArrayGroupFirst,
548 
549  /* Remember head of list so we can perform wakeups after dropping lock. */
550  wakeidx = nextidx;
551 
552  /* Walk the list and clear all XIDs. */
553  while (nextidx != INVALID_PGPROCNO)
554  {
555  PGPROC *proc = &allProcs[nextidx];
556  PGXACT *pgxact = &allPgXact[nextidx];
557 
559 
560  /* Move to next proc in list. */
561  nextidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
562  }
563 
564  /* We're done with the lock now. */
565  LWLockRelease(ProcArrayLock);
566 
567  /*
568  * Now that we've released the lock, go back and wake everybody up. We
569  * don't do this under the lock so as to keep lock hold times to a
570  * minimum. The system calls we need to perform to wake other processes
571  * up are probably much slower than the simple memory writes we did while
572  * holding the lock.
573  */
574  while (wakeidx != INVALID_PGPROCNO)
575  {
576  PGPROC *proc = &allProcs[wakeidx];
577 
578  wakeidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
580 
581  /* ensure all previous writes are visible before follower continues. */
583 
584  proc->procArrayGroupMember = false;
585 
586  if (proc != MyProc)
587  PGSemaphoreUnlock(proc->sem);
588  }
589 }
590 
591 /*
592  * ProcArrayClearTransaction -- clear the transaction fields
593  *
594  * This is used after successfully preparing a 2-phase transaction. We are
595  * not actually reporting the transaction's XID as no longer running --- it
596  * will still appear as running because the 2PC's gxact is in the ProcArray
597  * too. We just have to clear out our own PGXACT.
598  */
599 void
601 {
602  PGXACT *pgxact = &allPgXact[proc->pgprocno];
603 
604  /*
605  * We can skip locking ProcArrayLock here, because this action does not
606  * actually change anyone's view of the set of running XIDs: our entry is
607  * duplicate with the gxact that has already been inserted into the
608  * ProcArray.
609  */
610  pgxact->xid = InvalidTransactionId;
612  pgxact->xmin = InvalidTransactionId;
613  proc->recoveryConflictPending = false;
614 
615  /* redundant, but just in case */
617  proc->delayChkpt = false;
618 
619  /* Clear the subtransaction-XID cache too */
620  pgxact->nxids = 0;
621  pgxact->overflowed = false;
622 }
623 
624 /*
625  * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
626  *
627  * Remember up to where the startup process initialized the CLOG and subtrans
628  * so we can ensure it's initialized gaplessly up to the point where necessary
629  * while in recovery.
630  */
631 void
633 {
635  Assert(TransactionIdIsNormal(initializedUptoXID));
636 
637  /*
638  * we set latestObservedXid to the xid SUBTRANS has been initialized up
639  * to, so we can extend it from that point onwards in
640  * RecordKnownAssignedTransactionIds, and when we get consistent in
641  * ProcArrayApplyRecoveryInfo().
642  */
643  latestObservedXid = initializedUptoXID;
645 }
646 
647 /*
648  * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
649  *
650  * Takes us through 3 states: Initialized, Pending and Ready.
651  * Normal case is to go all the way to Ready straight away, though there
652  * are atypical cases where we need to take it in steps.
653  *
654  * Use the data about running transactions on master to create the initial
655  * state of KnownAssignedXids. We also use these records to regularly prune
656  * KnownAssignedXids because we know it is possible that some transactions
657  * with FATAL errors fail to write abort records, which could cause eventual
658  * overflow.
659  *
660  * See comments for LogStandbySnapshot().
661  */
662 void
664 {
665  TransactionId *xids;
666  int nxids;
667  int i;
668 
673 
674  /*
675  * Remove stale transactions, if any.
676  */
678 
679  /*
680  * Remove stale locks, if any.
681  */
683 
684  /*
685  * If our snapshot is already valid, nothing else to do...
686  */
688  return;
689 
690  /*
691  * If our initial RunningTransactionsData had an overflowed snapshot then
692  * we knew we were missing some subxids from our snapshot. If we continue
693  * to see overflowed snapshots then we might never be able to start up, so
694  * we make another test to see if our snapshot is now valid. We know that
695  * the missing subxids are equal to or earlier than nextXid. After we
696  * initialise we continue to apply changes during recovery, so once the
697  * oldestRunningXid is later than the nextXid from the initial snapshot we
698  * know that we no longer have missing information and can mark the
699  * snapshot as valid.
700  */
702  {
703  /*
704  * If the snapshot isn't overflowed or if its empty we can reset our
705  * pending state and use this snapshot instead.
706  */
707  if (!running->subxid_overflow || running->xcnt == 0)
708  {
709  /*
710  * If we have already collected known assigned xids, we need to
711  * throw them away before we apply the recovery snapshot.
712  */
715  }
716  else
717  {
719  running->oldestRunningXid))
720  {
723  "recovery snapshots are now enabled");
724  }
725  else
727  "recovery snapshot waiting for non-overflowed snapshot or "
728  "until oldest active xid on standby is at least %u (now %u)",
730  running->oldestRunningXid);
731  return;
732  }
733  }
734 
736 
737  /*
738  * NB: this can be reached at least twice, so make sure new code can deal
739  * with that.
740  */
741 
742  /*
743  * Nobody else is running yet, but take locks anyhow
744  */
745  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
746 
747  /*
748  * KnownAssignedXids is sorted so we cannot just add the xids, we have to
749  * sort them first.
750  *
751  * Some of the new xids are top-level xids and some are subtransactions.
752  * We don't call SubTransSetParent because it doesn't matter yet. If we
753  * aren't overflowed then all xids will fit in snapshot and so we don't
754  * need subtrans. If we later overflow, an xid assignment record will add
755  * xids to subtrans. If RunningTransactionsData is overflowed then we
756  * don't have enough information to correctly update subtrans anyway.
757  */
758 
759  /*
760  * Allocate a temporary array to avoid modifying the array passed as
761  * argument.
762  */
763  xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt));
764 
765  /*
766  * Add to the temp array any xids which have not already completed.
767  */
768  nxids = 0;
769  for (i = 0; i < running->xcnt + running->subxcnt; i++)
770  {
771  TransactionId xid = running->xids[i];
772 
773  /*
774  * The running-xacts snapshot can contain xids that were still visible
775  * in the procarray when the snapshot was taken, but were already
776  * WAL-logged as completed. They're not running anymore, so ignore
777  * them.
778  */
780  continue;
781 
782  xids[nxids++] = xid;
783  }
784 
785  if (nxids > 0)
786  {
787  if (procArray->numKnownAssignedXids != 0)
788  {
789  LWLockRelease(ProcArrayLock);
790  elog(ERROR, "KnownAssignedXids is not empty");
791  }
792 
793  /*
794  * Sort the array so that we can add them safely into
795  * KnownAssignedXids.
796  */
797  qsort(xids, nxids, sizeof(TransactionId), xidComparator);
798 
799  /*
800  * Add the sorted snapshot into KnownAssignedXids. The running-xacts
801  * snapshot may include duplicated xids because of prepared
802  * transactions, so ignore them.
803  */
804  for (i = 0; i < nxids; i++)
805  {
806  if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i]))
807  {
808  elog(DEBUG1,
809  "found duplicated transaction %u for KnownAssignedXids insertion",
810  xids[i]);
811  continue;
812  }
813  KnownAssignedXidsAdd(xids[i], xids[i], true);
814  }
815 
817  }
818 
819  pfree(xids);
820 
821  /*
822  * latestObservedXid is at least set to the point where SUBTRANS was
823  * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid
824  * RecordKnownAssignedTransactionIds() was called for. Initialize
825  * subtrans from thereon, up to nextXid - 1.
826  *
827  * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
828  * because we've just added xids to the known assigned xids machinery that
829  * haven't gone through RecordKnownAssignedTransactionId().
830  */
834  {
837  }
838  TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */
839 
840  /* ----------
841  * Now we've got the running xids we need to set the global values that
842  * are used to track snapshots as they evolve further.
843  *
844  * - latestCompletedXid which will be the xmax for snapshots
845  * - lastOverflowedXid which shows whether snapshots overflow
846  * - nextXid
847  *
848  * If the snapshot overflowed, then we still initialise with what we know,
849  * but the recovery snapshot isn't fully valid yet because we know there
850  * are some subxids missing. We don't know the specific subxids that are
851  * missing, so conservatively assume the last one is latestObservedXid.
852  * ----------
853  */
854  if (running->subxid_overflow)
855  {
857 
860  }
861  else
862  {
864 
866  }
867 
868  /*
869  * If a transaction wrote a commit record in the gap between taking and
870  * logging the snapshot then latestCompletedXid may already be higher than
871  * the value from the snapshot, so check before we use the incoming value.
872  */
874  running->latestCompletedXid))
876 
878 
879  LWLockRelease(ProcArrayLock);
880 
881  /* ShmemVariableCache->nextFullXid must be beyond any observed xid. */
883 
885 
888  elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled");
889  else
891  "recovery snapshot waiting for non-overflowed snapshot or "
892  "until oldest active xid on standby is at least %u (now %u)",
894  running->oldestRunningXid);
895 }
896 
897 /*
898  * ProcArrayApplyXidAssignment
899  * Process an XLOG_XACT_ASSIGNMENT WAL record
900  */
901 void
903  int nsubxids, TransactionId *subxids)
904 {
905  TransactionId max_xid;
906  int i;
907 
909 
910  max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
911 
912  /*
913  * Mark all the subtransactions as observed.
914  *
915  * NOTE: This will fail if the subxid contains too many previously
916  * unobserved xids to fit into known-assigned-xids. That shouldn't happen
917  * as the code stands, because xid-assignment records should never contain
918  * more than PGPROC_MAX_CACHED_SUBXIDS entries.
919  */
921 
922  /*
923  * Notice that we update pg_subtrans with the top-level xid, rather than
924  * the parent xid. This is a difference between normal processing and
925  * recovery, yet is still correct in all cases. The reason is that
926  * subtransaction commit is not marked in clog until commit processing, so
927  * all aborted subtransactions have already been clearly marked in clog.
928  * As a result we are able to refer directly to the top-level
929  * transaction's state rather than skipping through all the intermediate
930  * states in the subtransaction tree. This should be the first time we
931  * have attempted to SubTransSetParent().
932  */
933  for (i = 0; i < nsubxids; i++)
934  SubTransSetParent(subxids[i], topxid);
935 
936  /* KnownAssignedXids isn't maintained yet, so we're done for now */
938  return;
939 
940  /*
941  * Uses same locking as transaction commit
942  */
943  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
944 
945  /*
946  * Remove subxids from known-assigned-xacts.
947  */
949 
950  /*
951  * Advance lastOverflowedXid to be at least the last of these subxids.
952  */
953  if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid))
954  procArray->lastOverflowedXid = max_xid;
955 
956  LWLockRelease(ProcArrayLock);
957 }
958 
959 /*
960  * TransactionIdIsInProgress -- is given transaction running in some backend
961  *
962  * Aside from some shortcuts such as checking RecentXmin and our own Xid,
963  * there are four possibilities for finding a running transaction:
964  *
965  * 1. The given Xid is a main transaction Id. We will find this out cheaply
966  * by looking at the PGXACT struct for each backend.
967  *
968  * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
969  * We can find this out cheaply too.
970  *
971  * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
972  * if the Xid is running on the master.
973  *
974  * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
975  * if that is running according to PGXACT or KnownAssignedXids. This is the
976  * slowest way, but sadly it has to be done always if the others failed,
977  * unless we see that the cached subxact sets are complete (none have
978  * overflowed).
979  *
980  * ProcArrayLock has to be held while we do 1, 2, 3. If we save the top Xids
981  * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
982  * This buys back some concurrency (and we can't retrieve the main Xids from
983  * PGXACT again anyway; see GetNewTransactionId).
984  */
985 bool
987 {
988  static TransactionId *xids = NULL;
989  int nxids = 0;
990  ProcArrayStruct *arrayP = procArray;
991  TransactionId topxid;
992  int i,
993  j;
994 
995  /*
996  * Don't bother checking a transaction older than RecentXmin; it could not
997  * possibly still be running. (Note: in particular, this guarantees that
998  * we reject InvalidTransactionId, FrozenTransactionId, etc as not
999  * running.)
1000  */
1002  {
1004  return false;
1005  }
1006 
1007  /*
1008  * We may have just checked the status of this transaction, so if it is
1009  * already known to be completed, we can fall out without any access to
1010  * shared memory.
1011  */
1013  {
1015  return false;
1016  }
1017 
1018  /*
1019  * Also, we can handle our own transaction (and subtransactions) without
1020  * any access to shared memory.
1021  */
1023  {
1025  return true;
1026  }
1027 
1028  /*
1029  * If first time through, get workspace to remember main XIDs in. We
1030  * malloc it permanently to avoid repeated palloc/pfree overhead.
1031  */
1032  if (xids == NULL)
1033  {
1034  /*
1035  * In hot standby mode, reserve enough space to hold all xids in the
1036  * known-assigned list. If we later finish recovery, we no longer need
1037  * the bigger array, but we don't bother to shrink it.
1038  */
1039  int maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs;
1040 
1041  xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId));
1042  if (xids == NULL)
1043  ereport(ERROR,
1044  (errcode(ERRCODE_OUT_OF_MEMORY),
1045  errmsg("out of memory")));
1046  }
1047 
1048  LWLockAcquire(ProcArrayLock, LW_SHARED);
1049 
1050  /*
1051  * Now that we have the lock, we can check latestCompletedXid; if the
1052  * target Xid is after that, it's surely still running.
1053  */
1055  {
1056  LWLockRelease(ProcArrayLock);
1058  return true;
1059  }
1060 
1061  /* No shortcuts, gotta grovel through the array */
1062  for (i = 0; i < arrayP->numProcs; i++)
1063  {
1064  int pgprocno = arrayP->pgprocnos[i];
1065  PGPROC *proc = &allProcs[pgprocno];
1066  PGXACT *pgxact = &allPgXact[pgprocno];
1067  TransactionId pxid;
1068  int pxids;
1069 
1070  /* Ignore my own proc --- dealt with it above */
1071  if (proc == MyProc)
1072  continue;
1073 
1074  /* Fetch xid just once - see GetNewTransactionId */
1075  pxid = UINT32_ACCESS_ONCE(pgxact->xid);
1076 
1077  if (!TransactionIdIsValid(pxid))
1078  continue;
1079 
1080  /*
1081  * Step 1: check the main Xid
1082  */
1083  if (TransactionIdEquals(pxid, xid))
1084  {
1085  LWLockRelease(ProcArrayLock);
1087  return true;
1088  }
1089 
1090  /*
1091  * We can ignore main Xids that are younger than the target Xid, since
1092  * the target could not possibly be their child.
1093  */
1094  if (TransactionIdPrecedes(xid, pxid))
1095  continue;
1096 
1097  /*
1098  * Step 2: check the cached child-Xids arrays
1099  */
1100  pxids = pgxact->nxids;
1101  pg_read_barrier(); /* pairs with barrier in GetNewTransactionId() */
1102  for (j = pxids - 1; j >= 0; j--)
1103  {
1104  /* Fetch xid just once - see GetNewTransactionId */
1105  TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]);
1106 
1107  if (TransactionIdEquals(cxid, xid))
1108  {
1109  LWLockRelease(ProcArrayLock);
1111  return true;
1112  }
1113  }
1114 
1115  /*
1116  * Save the main Xid for step 4. We only need to remember main Xids
1117  * that have uncached children. (Note: there is no race condition
1118  * here because the overflowed flag cannot be cleared, only set, while
1119  * we hold ProcArrayLock. So we can't miss an Xid that we need to
1120  * worry about.)
1121  */
1122  if (pgxact->overflowed)
1123  xids[nxids++] = pxid;
1124  }
1125 
1126  /*
1127  * Step 3: in hot standby mode, check the known-assigned-xids list. XIDs
1128  * in the list must be treated as running.
1129  */
1130  if (RecoveryInProgress())
1131  {
1132  /* none of the PGXACT entries should have XIDs in hot standby mode */
1133  Assert(nxids == 0);
1134 
1135  if (KnownAssignedXidExists(xid))
1136  {
1137  LWLockRelease(ProcArrayLock);
1139  return true;
1140  }
1141 
1142  /*
1143  * If the KnownAssignedXids overflowed, we have to check pg_subtrans
1144  * too. Fetch all xids from KnownAssignedXids that are lower than
1145  * xid, since if xid is a subtransaction its parent will always have a
1146  * lower value. Note we will collect both main and subXIDs here, but
1147  * there's no help for it.
1148  */
1149  if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid))
1150  nxids = KnownAssignedXidsGet(xids, xid);
1151  }
1152 
1153  LWLockRelease(ProcArrayLock);
1154 
1155  /*
1156  * If none of the relevant caches overflowed, we know the Xid is not
1157  * running without even looking at pg_subtrans.
1158  */
1159  if (nxids == 0)
1160  {
1162  return false;
1163  }
1164 
1165  /*
1166  * Step 4: have to check pg_subtrans.
1167  *
1168  * At this point, we know it's either a subtransaction of one of the Xids
1169  * in xids[], or it's not running. If it's an already-failed
1170  * subtransaction, we want to say "not running" even though its parent may
1171  * still be running. So first, check pg_xact to see if it's been aborted.
1172  */
1174 
1175  if (TransactionIdDidAbort(xid))
1176  return false;
1177 
1178  /*
1179  * It isn't aborted, so check whether the transaction tree it belongs to
1180  * is still running (or, more precisely, whether it was running when we
1181  * held ProcArrayLock).
1182  */
1183  topxid = SubTransGetTopmostTransaction(xid);
1184  Assert(TransactionIdIsValid(topxid));
1185  if (!TransactionIdEquals(topxid, xid))
1186  {
1187  for (i = 0; i < nxids; i++)
1188  {
1189  if (TransactionIdEquals(xids[i], topxid))
1190  return true;
1191  }
1192  }
1193 
1194  return false;
1195 }
1196 
1197 /*
1198  * TransactionIdIsActive -- is xid the top-level XID of an active backend?
1199  *
1200  * This differs from TransactionIdIsInProgress in that it ignores prepared
1201  * transactions, as well as transactions running on the master if we're in
1202  * hot standby. Also, we ignore subtransactions since that's not needed
1203  * for current uses.
1204  */
1205 bool
1207 {
1208  bool result = false;
1209  ProcArrayStruct *arrayP = procArray;
1210  int i;
1211 
1212  /*
1213  * Don't bother checking a transaction older than RecentXmin; it could not
1214  * possibly still be running.
1215  */
1217  return false;
1218 
1219  LWLockAcquire(ProcArrayLock, LW_SHARED);
1220 
1221  for (i = 0; i < arrayP->numProcs; i++)
1222  {
1223  int pgprocno = arrayP->pgprocnos[i];
1224  PGPROC *proc = &allProcs[pgprocno];
1225  PGXACT *pgxact = &allPgXact[pgprocno];
1226  TransactionId pxid;
1227 
1228  /* Fetch xid just once - see GetNewTransactionId */
1229  pxid = UINT32_ACCESS_ONCE(pgxact->xid);
1230 
1231  if (!TransactionIdIsValid(pxid))
1232  continue;
1233 
1234  if (proc->pid == 0)
1235  continue; /* ignore prepared transactions */
1236 
1237  if (TransactionIdEquals(pxid, xid))
1238  {
1239  result = true;
1240  break;
1241  }
1242  }
1243 
1244  LWLockRelease(ProcArrayLock);
1245 
1246  return result;
1247 }
1248 
1249 
1250 /*
1251  * GetOldestXmin -- returns oldest transaction that was running
1252  * when any current transaction was started.
1253  *
1254  * If rel is NULL or a shared relation, all backends are considered, otherwise
1255  * only backends running in this database are considered.
1256  *
1257  * The flags are used to ignore the backends in calculation when any of the
1258  * corresponding flags is set. Typically, if you want to ignore ones with
1259  * PROC_IN_VACUUM flag, you can use PROCARRAY_FLAGS_VACUUM.
1260  *
1261  * PROCARRAY_SLOTS_XMIN causes GetOldestXmin to ignore the xmin and
1262  * catalog_xmin of any replication slots that exist in the system when
1263  * calculating the oldest xmin.
1264  *
1265  * This is used by VACUUM to decide which deleted tuples must be preserved in
1266  * the passed in table. For shared relations backends in all databases must be
1267  * considered, but for non-shared relations that's not required, since only
1268  * backends in my own database could ever see the tuples in them. Also, we can
1269  * ignore concurrently running lazy VACUUMs because (a) they must be working
1270  * on other tables, and (b) they don't need to do snapshot-based lookups.
1271  *
1272  * This is also used to determine where to truncate pg_subtrans. For that
1273  * backends in all databases have to be considered, so rel = NULL has to be
1274  * passed in.
1275  *
1276  * Note: we include all currently running xids in the set of considered xids.
1277  * This ensures that if a just-started xact has not yet set its snapshot,
1278  * when it does set the snapshot it cannot set xmin less than what we compute.
1279  * See notes in src/backend/access/transam/README.
1280  *
1281  * Note: despite the above, it's possible for the calculated value to move
1282  * backwards on repeated calls. The calculated value is conservative, so that
1283  * anything older is definitely not considered as running by anyone anymore,
1284  * but the exact value calculated depends on a number of things. For example,
1285  * if rel = NULL and there are no transactions running in the current
1286  * database, GetOldestXmin() returns latestCompletedXid. If a transaction
1287  * begins after that, its xmin will include in-progress transactions in other
1288  * databases that started earlier, so another call will return a lower value.
1289  * Nonetheless it is safe to vacuum a table in the current database with the
1290  * first result. There are also replication-related effects: a walsender
1291  * process can set its xmin based on transactions that are no longer running
1292  * in the master but are still being replayed on the standby, thus possibly
1293  * making the GetOldestXmin reading go backwards. In this case there is a
1294  * possibility that we lose data that the standby would like to have, but
1295  * unless the standby uses a replication slot to make its xmin persistent
1296  * there is little we can do about that --- data is only protected if the
1297  * walsender runs continuously while queries are executed on the standby.
1298  * (The Hot Standby code deals with such cases by failing standby queries
1299  * that needed to access already-removed data, so there's no integrity bug.)
1300  * The return value is also adjusted with vacuum_defer_cleanup_age, so
1301  * increasing that setting on the fly is another easy way to make
1302  * GetOldestXmin() move backwards, with no consequences for data integrity.
1303  */
1305 GetOldestXmin(Relation rel, int flags)
1306 {
1307  ProcArrayStruct *arrayP = procArray;
1308  TransactionId result;
1309  int index;
1310  bool allDbs;
1311 
1314 
1315  /*
1316  * If we're not computing a relation specific limit, or if a shared
1317  * relation has been passed in, backends in all databases have to be
1318  * considered.
1319  */
1320  allDbs = rel == NULL || rel->rd_rel->relisshared;
1321 
1322  /* Cannot look for individual databases during recovery */
1323  Assert(allDbs || !RecoveryInProgress());
1324 
1325  LWLockAcquire(ProcArrayLock, LW_SHARED);
1326 
1327  /*
1328  * We initialize the MIN() calculation with latestCompletedXid + 1. This
1329  * is a lower bound for the XIDs that might appear in the ProcArray later,
1330  * and so protects us against overestimating the result due to future
1331  * additions.
1332  */
1334  Assert(TransactionIdIsNormal(result));
1335  TransactionIdAdvance(result);
1336 
1337  for (index = 0; index < arrayP->numProcs; index++)
1338  {
1339  int pgprocno = arrayP->pgprocnos[index];
1340  PGPROC *proc = &allProcs[pgprocno];
1341  PGXACT *pgxact = &allPgXact[pgprocno];
1342 
1343  if (pgxact->vacuumFlags & (flags & PROCARRAY_PROC_FLAGS_MASK))
1344  continue;
1345 
1346  if (allDbs ||
1347  proc->databaseId == MyDatabaseId ||
1348  proc->databaseId == 0) /* always include WalSender */
1349  {
1350  /* Fetch xid just once - see GetNewTransactionId */
1351  TransactionId xid = UINT32_ACCESS_ONCE(pgxact->xid);
1352 
1353  /* First consider the transaction's own Xid, if any */
1354  if (TransactionIdIsNormal(xid) &&
1355  TransactionIdPrecedes(xid, result))
1356  result = xid;
1357 
1358  /*
1359  * Also consider the transaction's Xmin, if set.
1360  *
1361  * We must check both Xid and Xmin because a transaction might
1362  * have an Xmin but not (yet) an Xid; conversely, if it has an
1363  * Xid, that could determine some not-yet-set Xmin.
1364  */
1365  xid = UINT32_ACCESS_ONCE(pgxact->xmin);
1366  if (TransactionIdIsNormal(xid) &&
1367  TransactionIdPrecedes(xid, result))
1368  result = xid;
1369  }
1370  }
1371 
1372  /*
1373  * Fetch into local variable while ProcArrayLock is held - the
1374  * LWLockRelease below is a barrier, ensuring this happens inside the
1375  * lock.
1376  */
1377  replication_slot_xmin = procArray->replication_slot_xmin;
1378  replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
1379 
1380  if (RecoveryInProgress())
1381  {
1382  /*
1383  * Check to see whether KnownAssignedXids contains an xid value older
1384  * than the main procarray.
1385  */
1387 
1388  LWLockRelease(ProcArrayLock);
1389 
1390  if (TransactionIdIsNormal(kaxmin) &&
1391  TransactionIdPrecedes(kaxmin, result))
1392  result = kaxmin;
1393  }
1394  else
1395  {
1396  /*
1397  * No other information needed, so release the lock immediately.
1398  */
1399  LWLockRelease(ProcArrayLock);
1400 
1401  /*
1402  * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age,
1403  * being careful not to generate a "permanent" XID.
1404  *
1405  * vacuum_defer_cleanup_age provides some additional "slop" for the
1406  * benefit of hot standby queries on standby servers. This is quick
1407  * and dirty, and perhaps not all that useful unless the master has a
1408  * predictable transaction rate, but it offers some protection when
1409  * there's no walsender connection. Note that we are assuming
1410  * vacuum_defer_cleanup_age isn't large enough to cause wraparound ---
1411  * so guc.c should limit it to no more than the xidStopLimit threshold
1412  * in varsup.c. Also note that we intentionally don't apply
1413  * vacuum_defer_cleanup_age on standby servers.
1414  */
1415  result -= vacuum_defer_cleanup_age;
1416  if (!TransactionIdIsNormal(result))
1417  result = FirstNormalTransactionId;
1418  }
1419 
1420  /*
1421  * Check whether there are replication slots requiring an older xmin.
1422  */
1423  if (!(flags & PROCARRAY_SLOTS_XMIN) &&
1424  TransactionIdIsValid(replication_slot_xmin) &&
1425  NormalTransactionIdPrecedes(replication_slot_xmin, result))
1426  result = replication_slot_xmin;
1427 
1428  /*
1429  * After locks have been released and vacuum_defer_cleanup_age has been
1430  * applied, check whether we need to back up further to make logical
1431  * decoding possible. We need to do so if we're computing the global limit
1432  * (rel = NULL) or if the passed relation is a catalog relation of some
1433  * kind.
1434  */
1435  if (!(flags & PROCARRAY_SLOTS_XMIN) &&
1436  (rel == NULL ||
1438  TransactionIdIsValid(replication_slot_catalog_xmin) &&
1439  NormalTransactionIdPrecedes(replication_slot_catalog_xmin, result))
1441 
1442  return result;
1443 }
1444 
1445 /*
1446  * GetMaxSnapshotXidCount -- get max size for snapshot XID array
1447  *
1448  * We have to export this for use by snapmgr.c.
1449  */
1450 int
1452 {
1453  return procArray->maxProcs;
1454 }
1455 
1456 /*
1457  * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
1458  *
1459  * We have to export this for use by snapmgr.c.
1460  */
1461 int
1463 {
1464  return TOTAL_MAX_CACHED_SUBXIDS;
1465 }
1466 
1467 /*
1468  * GetSnapshotData -- returns information about running transactions.
1469  *
1470  * The returned snapshot includes xmin (lowest still-running xact ID),
1471  * xmax (highest completed xact ID + 1), and a list of running xact IDs
1472  * in the range xmin <= xid < xmax. It is used as follows:
1473  * All xact IDs < xmin are considered finished.
1474  * All xact IDs >= xmax are considered still running.
1475  * For an xact ID xmin <= xid < xmax, consult list to see whether
1476  * it is considered running or not.
1477  * This ensures that the set of transactions seen as "running" by the
1478  * current xact will not change after it takes the snapshot.
1479  *
1480  * All running top-level XIDs are included in the snapshot, except for lazy
1481  * VACUUM processes. We also try to include running subtransaction XIDs,
1482  * but since PGPROC has only a limited cache area for subxact XIDs, full
1483  * information may not be available. If we find any overflowed subxid arrays,
1484  * we have to mark the snapshot's subxid data as overflowed, and extra work
1485  * *may* need to be done to determine what's running (see XidInMVCCSnapshot()
1486  * in heapam_visibility.c).
1487  *
1488  * We also update the following backend-global variables:
1489  * TransactionXmin: the oldest xmin of any snapshot in use in the
1490  * current transaction (this is the same as MyPgXact->xmin).
1491  * RecentXmin: the xmin computed for the most recent snapshot. XIDs
1492  * older than this are known not running any more.
1493  * RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
1494  * running transactions, except those running LAZY VACUUM). This is
1495  * the same computation done by
1496  * GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM).
1497  * RecentGlobalDataXmin: the global xmin for non-catalog tables
1498  * >= RecentGlobalXmin
1499  *
1500  * Note: this function should probably not be called with an argument that's
1501  * not statically allocated (see xip allocation below).
1502  */
1503 Snapshot
1505 {
1506  ProcArrayStruct *arrayP = procArray;
1507  TransactionId xmin;
1508  TransactionId xmax;
1509  TransactionId globalxmin;
1510  int index;
1511  int count = 0;
1512  int subcount = 0;
1513  bool suboverflowed = false;
1516 
1517  Assert(snapshot != NULL);
1518 
1519  /*
1520  * Allocating space for maxProcs xids is usually overkill; numProcs would
1521  * be sufficient. But it seems better to do the malloc while not holding
1522  * the lock, so we can't look at numProcs. Likewise, we allocate much
1523  * more subxip storage than is probably needed.
1524  *
1525  * This does open a possibility for avoiding repeated malloc/free: since
1526  * maxProcs does not change at runtime, we can simply reuse the previous
1527  * xip arrays if any. (This relies on the fact that all callers pass
1528  * static SnapshotData structs.)
1529  */
1530  if (snapshot->xip == NULL)
1531  {
1532  /*
1533  * First call for this snapshot. Snapshot is same size whether or not
1534  * we are in recovery, see later comments.
1535  */
1536  snapshot->xip = (TransactionId *)
1538  if (snapshot->xip == NULL)
1539  ereport(ERROR,
1540  (errcode(ERRCODE_OUT_OF_MEMORY),
1541  errmsg("out of memory")));
1542  Assert(snapshot->subxip == NULL);
1543  snapshot->subxip = (TransactionId *)
1545  if (snapshot->subxip == NULL)
1546  ereport(ERROR,
1547  (errcode(ERRCODE_OUT_OF_MEMORY),
1548  errmsg("out of memory")));
1549  }
1550 
1551  /*
1552  * It is sufficient to get shared lock on ProcArrayLock, even if we are
1553  * going to set MyPgXact->xmin.
1554  */
1555  LWLockAcquire(ProcArrayLock, LW_SHARED);
1556 
1557  /* xmax is always latestCompletedXid + 1 */
1560  TransactionIdAdvance(xmax);
1561 
1562  /* initialize xmin calculation with xmax */
1563  globalxmin = xmin = xmax;
1564 
1566 
1567  if (!snapshot->takenDuringRecovery)
1568  {
1569  int *pgprocnos = arrayP->pgprocnos;
1570  int numProcs;
1571 
1572  /*
1573  * Spin over procArray checking xid, xmin, and subxids. The goal is
1574  * to gather all active xids, find the lowest xmin, and try to record
1575  * subxids.
1576  */
1577  numProcs = arrayP->numProcs;
1578  for (index = 0; index < numProcs; index++)
1579  {
1580  int pgprocno = pgprocnos[index];
1581  PGXACT *pgxact = &allPgXact[pgprocno];
1582  TransactionId xid;
1583 
1584  /*
1585  * Skip over backends doing logical decoding which manages xmin
1586  * separately (check below) and ones running LAZY VACUUM.
1587  */
1588  if (pgxact->vacuumFlags &
1590  continue;
1591 
1592  /* Update globalxmin to be the smallest valid xmin */
1593  xid = UINT32_ACCESS_ONCE(pgxact->xmin);
1594  if (TransactionIdIsNormal(xid) &&
1595  NormalTransactionIdPrecedes(xid, globalxmin))
1596  globalxmin = xid;
1597 
1598  /* Fetch xid just once - see GetNewTransactionId */
1599  xid = UINT32_ACCESS_ONCE(pgxact->xid);
1600 
1601  /*
1602  * If the transaction has no XID assigned, we can skip it; it
1603  * won't have sub-XIDs either. If the XID is >= xmax, we can also
1604  * skip it; such transactions will be treated as running anyway
1605  * (and any sub-XIDs will also be >= xmax).
1606  */
1607  if (!TransactionIdIsNormal(xid)
1608  || !NormalTransactionIdPrecedes(xid, xmax))
1609  continue;
1610 
1611  /*
1612  * We don't include our own XIDs (if any) in the snapshot, but we
1613  * must include them in xmin.
1614  */
1615  if (NormalTransactionIdPrecedes(xid, xmin))
1616  xmin = xid;
1617  if (pgxact == MyPgXact)
1618  continue;
1619 
1620  /* Add XID to snapshot. */
1621  snapshot->xip[count++] = xid;
1622 
1623  /*
1624  * Save subtransaction XIDs if possible (if we've already
1625  * overflowed, there's no point). Note that the subxact XIDs must
1626  * be later than their parent, so no need to check them against
1627  * xmin. We could filter against xmax, but it seems better not to
1628  * do that much work while holding the ProcArrayLock.
1629  *
1630  * The other backend can add more subxids concurrently, but cannot
1631  * remove any. Hence it's important to fetch nxids just once.
1632  * Should be safe to use memcpy, though. (We needn't worry about
1633  * missing any xids added concurrently, because they must postdate
1634  * xmax.)
1635  *
1636  * Again, our own XIDs are not included in the snapshot.
1637  */
1638  if (!suboverflowed)
1639  {
1640  if (pgxact->overflowed)
1641  suboverflowed = true;
1642  else
1643  {
1644  int nxids = pgxact->nxids;
1645 
1646  if (nxids > 0)
1647  {
1648  PGPROC *proc = &allProcs[pgprocno];
1649 
1650  pg_read_barrier(); /* pairs with GetNewTransactionId */
1651 
1652  memcpy(snapshot->subxip + subcount,
1653  (void *) proc->subxids.xids,
1654  nxids * sizeof(TransactionId));
1655  subcount += nxids;
1656  }
1657  }
1658  }
1659  }
1660  }
1661  else
1662  {
1663  /*
1664  * We're in hot standby, so get XIDs from KnownAssignedXids.
1665  *
1666  * We store all xids directly into subxip[]. Here's why:
1667  *
1668  * In recovery we don't know which xids are top-level and which are
1669  * subxacts, a design choice that greatly simplifies xid processing.
1670  *
1671  * It seems like we would want to try to put xids into xip[] only, but
1672  * that is fairly small. We would either need to make that bigger or
1673  * to increase the rate at which we WAL-log xid assignment; neither is
1674  * an appealing choice.
1675  *
1676  * We could try to store xids into xip[] first and then into subxip[]
1677  * if there are too many xids. That only works if the snapshot doesn't
1678  * overflow because we do not search subxip[] in that case. A simpler
1679  * way is to just store all xids in the subxact array because this is
1680  * by far the bigger array. We just leave the xip array empty.
1681  *
1682  * Either way we need to change the way XidInMVCCSnapshot() works
1683  * depending upon when the snapshot was taken, or change normal
1684  * snapshot processing so it matches.
1685  *
1686  * Note: It is possible for recovery to end before we finish taking
1687  * the snapshot, and for newly assigned transaction ids to be added to
1688  * the ProcArray. xmax cannot change while we hold ProcArrayLock, so
1689  * those newly added transaction ids would be filtered away, so we
1690  * need not be concerned about them.
1691  */
1692  subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin,
1693  xmax);
1694 
1695  if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid))
1696  suboverflowed = true;
1697  }
1698 
1699 
1700  /*
1701  * Fetch into local variable while ProcArrayLock is held - the
1702  * LWLockRelease below is a barrier, ensuring this happens inside the
1703  * lock.
1704  */
1705  replication_slot_xmin = procArray->replication_slot_xmin;
1706  replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
1707 
1709  MyPgXact->xmin = TransactionXmin = xmin;
1710 
1711  LWLockRelease(ProcArrayLock);
1712 
1713  /*
1714  * Update globalxmin to include actual process xids. This is a slightly
1715  * different way of computing it than GetOldestXmin uses, but should give
1716  * the same result.
1717  */
1718  if (TransactionIdPrecedes(xmin, globalxmin))
1719  globalxmin = xmin;
1720 
1721  /* Update global variables too */
1725 
1726  /* Check whether there's a replication slot requiring an older xmin. */
1727  if (TransactionIdIsValid(replication_slot_xmin) &&
1728  NormalTransactionIdPrecedes(replication_slot_xmin, RecentGlobalXmin))
1730 
1731  /* Non-catalog tables can be vacuumed if older than this xid */
1733 
1734  /*
1735  * Check whether there's a replication slot requiring an older catalog
1736  * xmin.
1737  */
1738  if (TransactionIdIsNormal(replication_slot_catalog_xmin) &&
1739  NormalTransactionIdPrecedes(replication_slot_catalog_xmin, RecentGlobalXmin))
1741 
1742  RecentXmin = xmin;
1743 
1744  snapshot->xmin = xmin;
1745  snapshot->xmax = xmax;
1746  snapshot->xcnt = count;
1747  snapshot->subxcnt = subcount;
1748  snapshot->suboverflowed = suboverflowed;
1749 
1750  snapshot->curcid = GetCurrentCommandId(false);
1751 
1752  /*
1753  * This is a new snapshot, so set both refcounts are zero, and mark it as
1754  * not copied in persistent memory.
1755  */
1756  snapshot->active_count = 0;
1757  snapshot->regd_count = 0;
1758  snapshot->copied = false;
1759 
1760  if (old_snapshot_threshold < 0)
1761  {
1762  /*
1763  * If not using "snapshot too old" feature, fill related fields with
1764  * dummy values that don't require any locking.
1765  */
1766  snapshot->lsn = InvalidXLogRecPtr;
1767  snapshot->whenTaken = 0;
1768  }
1769  else
1770  {
1771  /*
1772  * Capture the current time and WAL stream location in case this
1773  * snapshot becomes old enough to need to fall back on the special
1774  * "old snapshot" logic.
1775  */
1776  snapshot->lsn = GetXLogInsertRecPtr();
1777  snapshot->whenTaken = GetSnapshotCurrentTimestamp();
1778  MaintainOldSnapshotTimeMapping(snapshot->whenTaken, xmin);
1779  }
1780 
1781  return snapshot;
1782 }
1783 
1784 /*
1785  * ProcArrayInstallImportedXmin -- install imported xmin into MyPgXact->xmin
1786  *
1787  * This is called when installing a snapshot imported from another
1788  * transaction. To ensure that OldestXmin doesn't go backwards, we must
1789  * check that the source transaction is still running, and we'd better do
1790  * that atomically with installing the new xmin.
1791  *
1792  * Returns true if successful, false if source xact is no longer running.
1793  */
1794 bool
1796  VirtualTransactionId *sourcevxid)
1797 {
1798  bool result = false;
1799  ProcArrayStruct *arrayP = procArray;
1800  int index;
1801 
1803  if (!sourcevxid)
1804  return false;
1805 
1806  /* Get lock so source xact can't end while we're doing this */
1807  LWLockAcquire(ProcArrayLock, LW_SHARED);
1808 
1809  for (index = 0; index < arrayP->numProcs; index++)
1810  {
1811  int pgprocno = arrayP->pgprocnos[index];
1812  PGPROC *proc = &allProcs[pgprocno];
1813  PGXACT *pgxact = &allPgXact[pgprocno];
1814  TransactionId xid;
1815 
1816  /* Ignore procs running LAZY VACUUM */
1817  if (pgxact->vacuumFlags & PROC_IN_VACUUM)
1818  continue;
1819 
1820  /* We are only interested in the specific virtual transaction. */
1821  if (proc->backendId != sourcevxid->backendId)
1822  continue;
1823  if (proc->lxid != sourcevxid->localTransactionId)
1824  continue;
1825 
1826  /*
1827  * We check the transaction's database ID for paranoia's sake: if it's
1828  * in another DB then its xmin does not cover us. Caller should have
1829  * detected this already, so we just treat any funny cases as
1830  * "transaction not found".
1831  */
1832  if (proc->databaseId != MyDatabaseId)
1833  continue;
1834 
1835  /*
1836  * Likewise, let's just make real sure its xmin does cover us.
1837  */
1838  xid = UINT32_ACCESS_ONCE(pgxact->xmin);
1839  if (!TransactionIdIsNormal(xid) ||
1840  !TransactionIdPrecedesOrEquals(xid, xmin))
1841  continue;
1842 
1843  /*
1844  * We're good. Install the new xmin. As in GetSnapshotData, set
1845  * TransactionXmin too. (Note that because snapmgr.c called
1846  * GetSnapshotData first, we'll be overwriting a valid xmin here, so
1847  * we don't check that.)
1848  */
1849  MyPgXact->xmin = TransactionXmin = xmin;
1850 
1851  result = true;
1852  break;
1853  }
1854 
1855  LWLockRelease(ProcArrayLock);
1856 
1857  return result;
1858 }
1859 
1860 /*
1861  * ProcArrayInstallRestoredXmin -- install restored xmin into MyPgXact->xmin
1862  *
1863  * This is like ProcArrayInstallImportedXmin, but we have a pointer to the
1864  * PGPROC of the transaction from which we imported the snapshot, rather than
1865  * an XID.
1866  *
1867  * Returns true if successful, false if source xact is no longer running.
1868  */
1869 bool
1871 {
1872  bool result = false;
1873  TransactionId xid;
1874  PGXACT *pgxact;
1875 
1877  Assert(proc != NULL);
1878 
1879  /* Get lock so source xact can't end while we're doing this */
1880  LWLockAcquire(ProcArrayLock, LW_SHARED);
1881 
1882  pgxact = &allPgXact[proc->pgprocno];
1883 
1884  /*
1885  * Be certain that the referenced PGPROC has an advertised xmin which is
1886  * no later than the one we're installing, so that the system-wide xmin
1887  * can't go backwards. Also, make sure it's running in the same database,
1888  * so that the per-database xmin cannot go backwards.
1889  */
1890  xid = UINT32_ACCESS_ONCE(pgxact->xmin);
1891  if (proc->databaseId == MyDatabaseId &&
1892  TransactionIdIsNormal(xid) &&
1893  TransactionIdPrecedesOrEquals(xid, xmin))
1894  {
1895  MyPgXact->xmin = TransactionXmin = xmin;
1896  result = true;
1897  }
1898 
1899  LWLockRelease(ProcArrayLock);
1900 
1901  return result;
1902 }
1903 
1904 /*
1905  * GetRunningTransactionData -- returns information about running transactions.
1906  *
1907  * Similar to GetSnapshotData but returns more information. We include
1908  * all PGXACTs with an assigned TransactionId, even VACUUM processes and
1909  * prepared transactions.
1910  *
1911  * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
1912  * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
1913  * array until the caller has WAL-logged this snapshot, and releases the
1914  * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
1915  * lock is released.
1916  *
1917  * The returned data structure is statically allocated; caller should not
1918  * modify it, and must not assume it is valid past the next call.
1919  *
1920  * This is never executed during recovery so there is no need to look at
1921  * KnownAssignedXids.
1922  *
1923  * Dummy PGXACTs from prepared transaction are included, meaning that this
1924  * may return entries with duplicated TransactionId values coming from
1925  * transaction finishing to prepare. Nothing is done about duplicated
1926  * entries here to not hold on ProcArrayLock more than necessary.
1927  *
1928  * We don't worry about updating other counters, we want to keep this as
1929  * simple as possible and leave GetSnapshotData() as the primary code for
1930  * that bookkeeping.
1931  *
1932  * Note that if any transaction has overflowed its cached subtransactions
1933  * then there is no real need include any subtransactions.
1934  */
1937 {
1938  /* result workspace */
1939  static RunningTransactionsData CurrentRunningXactsData;
1940 
1941  ProcArrayStruct *arrayP = procArray;
1942  RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
1943  TransactionId latestCompletedXid;
1944  TransactionId oldestRunningXid;
1945  TransactionId *xids;
1946  int index;
1947  int count;
1948  int subcount;
1949  bool suboverflowed;
1950 
1952 
1953  /*
1954  * Allocating space for maxProcs xids is usually overkill; numProcs would
1955  * be sufficient. But it seems better to do the malloc while not holding
1956  * the lock, so we can't look at numProcs. Likewise, we allocate much
1957  * more subxip storage than is probably needed.
1958  *
1959  * Should only be allocated in bgwriter, since only ever executed during
1960  * checkpoints.
1961  */
1962  if (CurrentRunningXacts->xids == NULL)
1963  {
1964  /*
1965  * First call
1966  */
1967  CurrentRunningXacts->xids = (TransactionId *)
1969  if (CurrentRunningXacts->xids == NULL)
1970  ereport(ERROR,
1971  (errcode(ERRCODE_OUT_OF_MEMORY),
1972  errmsg("out of memory")));
1973  }
1974 
1975  xids = CurrentRunningXacts->xids;
1976 
1977  count = subcount = 0;
1978  suboverflowed = false;
1979 
1980  /*
1981  * Ensure that no xids enter or leave the procarray while we obtain
1982  * snapshot.
1983  */
1984  LWLockAcquire(ProcArrayLock, LW_SHARED);
1985  LWLockAcquire(XidGenLock, LW_SHARED);
1986 
1987  latestCompletedXid = ShmemVariableCache->latestCompletedXid;
1988 
1990 
1991  /*
1992  * Spin over procArray collecting all xids
1993  */
1994  for (index = 0; index < arrayP->numProcs; index++)
1995  {
1996  int pgprocno = arrayP->pgprocnos[index];
1997  PGXACT *pgxact = &allPgXact[pgprocno];
1998  TransactionId xid;
1999 
2000  /* Fetch xid just once - see GetNewTransactionId */
2001  xid = UINT32_ACCESS_ONCE(pgxact->xid);
2002 
2003  /*
2004  * We don't need to store transactions that don't have a TransactionId
2005  * yet because they will not show as running on a standby server.
2006  */
2007  if (!TransactionIdIsValid(xid))
2008  continue;
2009 
2010  /*
2011  * Be careful not to exclude any xids before calculating the values of
2012  * oldestRunningXid and suboverflowed, since these are used to clean
2013  * up transaction information held on standbys.
2014  */
2015  if (TransactionIdPrecedes(xid, oldestRunningXid))
2016  oldestRunningXid = xid;
2017 
2018  if (pgxact->overflowed)
2019  suboverflowed = true;
2020 
2021  /*
2022  * If we wished to exclude xids this would be the right place for it.
2023  * Procs with the PROC_IN_VACUUM flag set don't usually assign xids,
2024  * but they do during truncation at the end when they get the lock and
2025  * truncate, so it is not much of a problem to include them if they
2026  * are seen and it is cleaner to include them.
2027  */
2028 
2029  xids[count++] = xid;
2030  }
2031 
2032  /*
2033  * Spin over procArray collecting all subxids, but only if there hasn't
2034  * been a suboverflow.
2035  */
2036  if (!suboverflowed)
2037  {
2038  for (index = 0; index < arrayP->numProcs; index++)
2039  {
2040  int pgprocno = arrayP->pgprocnos[index];
2041  PGPROC *proc = &allProcs[pgprocno];
2042  PGXACT *pgxact = &allPgXact[pgprocno];
2043  int nxids;
2044 
2045  /*
2046  * Save subtransaction XIDs. Other backends can't add or remove
2047  * entries while we're holding XidGenLock.
2048  */
2049  nxids = pgxact->nxids;
2050  if (nxids > 0)
2051  {
2052  /* barrier not really required, as XidGenLock is held, but ... */
2053  pg_read_barrier(); /* pairs with GetNewTransactionId */
2054 
2055  memcpy(&xids[count], (void *) proc->subxids.xids,
2056  nxids * sizeof(TransactionId));
2057  count += nxids;
2058  subcount += nxids;
2059 
2060  /*
2061  * Top-level XID of a transaction is always less than any of
2062  * its subxids, so we don't need to check if any of the
2063  * subxids are smaller than oldestRunningXid
2064  */
2065  }
2066  }
2067  }
2068 
2069  /*
2070  * It's important *not* to include the limits set by slots here because
2071  * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
2072  * were to be included here the initial value could never increase because
2073  * of a circular dependency where slots only increase their limits when
2074  * running xacts increases oldestRunningXid and running xacts only
2075  * increases if slots do.
2076  */
2077 
2078  CurrentRunningXacts->xcnt = count - subcount;
2079  CurrentRunningXacts->subxcnt = subcount;
2080  CurrentRunningXacts->subxid_overflow = suboverflowed;
2082  CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
2083  CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
2084 
2085  Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
2086  Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
2087  Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
2088 
2089  /* We don't release the locks here, the caller is responsible for that */
2090 
2091  return CurrentRunningXacts;
2092 }
2093 
2094 /*
2095  * GetOldestActiveTransactionId()
2096  *
2097  * Similar to GetSnapshotData but returns just oldestActiveXid. We include
2098  * all PGXACTs with an assigned TransactionId, even VACUUM processes.
2099  * We look at all databases, though there is no need to include WALSender
2100  * since this has no effect on hot standby conflicts.
2101  *
2102  * This is never executed during recovery so there is no need to look at
2103  * KnownAssignedXids.
2104  *
2105  * We don't worry about updating other counters, we want to keep this as
2106  * simple as possible and leave GetSnapshotData() as the primary code for
2107  * that bookkeeping.
2108  */
2111 {
2112  ProcArrayStruct *arrayP = procArray;
2113  TransactionId oldestRunningXid;
2114  int index;
2115 
2117 
2118  /*
2119  * Read nextXid, as the upper bound of what's still active.
2120  *
2121  * Reading a TransactionId is atomic, but we must grab the lock to make
2122  * sure that all XIDs < nextXid are already present in the proc array (or
2123  * have already completed), when we spin over it.
2124  */
2125  LWLockAcquire(XidGenLock, LW_SHARED);
2127  LWLockRelease(XidGenLock);
2128 
2129  /*
2130  * Spin over procArray collecting all xids and subxids.
2131  */
2132  LWLockAcquire(ProcArrayLock, LW_SHARED);
2133  for (index = 0; index < arrayP->numProcs; index++)
2134  {
2135  int pgprocno = arrayP->pgprocnos[index];
2136  PGXACT *pgxact = &allPgXact[pgprocno];
2137  TransactionId xid;
2138 
2139  /* Fetch xid just once - see GetNewTransactionId */
2140  xid = UINT32_ACCESS_ONCE(pgxact->xid);
2141 
2142  if (!TransactionIdIsNormal(xid))
2143  continue;
2144 
2145  if (TransactionIdPrecedes(xid, oldestRunningXid))
2146  oldestRunningXid = xid;
2147 
2148  /*
2149  * Top-level XID of a transaction is always less than any of its
2150  * subxids, so we don't need to check if any of the subxids are
2151  * smaller than oldestRunningXid
2152  */
2153  }
2154  LWLockRelease(ProcArrayLock);
2155 
2156  return oldestRunningXid;
2157 }
2158 
2159 /*
2160  * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
2161  *
2162  * Returns the oldest xid that we can guarantee not to have been affected by
2163  * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
2164  * transaction aborted. Note that the value can (and most of the time will) be
2165  * much more conservative than what really has been affected by vacuum, but we
2166  * currently don't have better data available.
2167  *
2168  * This is useful to initialize the cutoff xid after which a new changeset
2169  * extraction replication slot can start decoding changes.
2170  *
2171  * Must be called with ProcArrayLock held either shared or exclusively,
2172  * although most callers will want to use exclusive mode since it is expected
2173  * that the caller will immediately use the xid to peg the xmin horizon.
2174  */
2177 {
2178  ProcArrayStruct *arrayP = procArray;
2179  TransactionId oldestSafeXid;
2180  int index;
2181  bool recovery_in_progress = RecoveryInProgress();
2182 
2183  Assert(LWLockHeldByMe(ProcArrayLock));
2184 
2185  /*
2186  * Acquire XidGenLock, so no transactions can acquire an xid while we're
2187  * running. If no transaction with xid were running concurrently a new xid
2188  * could influence the RecentXmin et al.
2189  *
2190  * We initialize the computation to nextXid since that's guaranteed to be
2191  * a safe, albeit pessimal, value.
2192  */
2193  LWLockAcquire(XidGenLock, LW_SHARED);
2195 
2196  /*
2197  * If there's already a slot pegging the xmin horizon, we can start with
2198  * that value, it's guaranteed to be safe since it's computed by this
2199  * routine initially and has been enforced since. We can always use the
2200  * slot's general xmin horizon, but the catalog horizon is only usable
2201  * when only catalog data is going to be looked at.
2202  */
2203  if (TransactionIdIsValid(procArray->replication_slot_xmin) &&
2205  oldestSafeXid))
2206  oldestSafeXid = procArray->replication_slot_xmin;
2207 
2208  if (catalogOnly &&
2211  oldestSafeXid))
2212  oldestSafeXid = procArray->replication_slot_catalog_xmin;
2213 
2214  /*
2215  * If we're not in recovery, we walk over the procarray and collect the
2216  * lowest xid. Since we're called with ProcArrayLock held and have
2217  * acquired XidGenLock, no entries can vanish concurrently, since
2218  * PGXACT->xid is only set with XidGenLock held and only cleared with
2219  * ProcArrayLock held.
2220  *
2221  * In recovery we can't lower the safe value besides what we've computed
2222  * above, so we'll have to wait a bit longer there. We unfortunately can
2223  * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
2224  * machinery can miss values and return an older value than is safe.
2225  */
2226  if (!recovery_in_progress)
2227  {
2228  /*
2229  * Spin over procArray collecting all min(PGXACT->xid)
2230  */
2231  for (index = 0; index < arrayP->numProcs; index++)
2232  {
2233  int pgprocno = arrayP->pgprocnos[index];
2234  PGXACT *pgxact = &allPgXact[pgprocno];
2235  TransactionId xid;
2236 
2237  /* Fetch xid just once - see GetNewTransactionId */
2238  xid = UINT32_ACCESS_ONCE(pgxact->xid);
2239 
2240  if (!TransactionIdIsNormal(xid))
2241  continue;
2242 
2243  if (TransactionIdPrecedes(xid, oldestSafeXid))
2244  oldestSafeXid = xid;
2245  }
2246  }
2247 
2248  LWLockRelease(XidGenLock);
2249 
2250  return oldestSafeXid;
2251 }
2252 
2253 /*
2254  * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
2255  * delaying checkpoint because they have critical actions in progress.
2256  *
2257  * Constructs an array of VXIDs of transactions that are currently in commit
2258  * critical sections, as shown by having delayChkpt set in their PGPROC.
2259  *
2260  * Returns a palloc'd array that should be freed by the caller.
2261  * *nvxids is the number of valid entries.
2262  *
2263  * Note that because backends set or clear delayChkpt without holding any lock,
2264  * the result is somewhat indeterminate, but we don't really care. Even in
2265  * a multiprocessor with delayed writes to shared memory, it should be certain
2266  * that setting of delayChkpt will propagate to shared memory when the backend
2267  * takes a lock, so we cannot fail to see a virtual xact as delayChkpt if
2268  * it's already inserted its commit record. Whether it takes a little while
2269  * for clearing of delayChkpt to propagate is unimportant for correctness.
2270  */
2273 {
2274  VirtualTransactionId *vxids;
2275  ProcArrayStruct *arrayP = procArray;
2276  int count = 0;
2277  int index;
2278 
2279  /* allocate what's certainly enough result space */
2280  vxids = (VirtualTransactionId *)
2281  palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
2282 
2283  LWLockAcquire(ProcArrayLock, LW_SHARED);
2284 
2285  for (index = 0; index < arrayP->numProcs; index++)
2286  {
2287  int pgprocno = arrayP->pgprocnos[index];
2288  PGPROC *proc = &allProcs[pgprocno];
2289 
2290  if (proc->delayChkpt)
2291  {
2292  VirtualTransactionId vxid;
2293 
2294  GET_VXID_FROM_PGPROC(vxid, *proc);
2295  if (VirtualTransactionIdIsValid(vxid))
2296  vxids[count++] = vxid;
2297  }
2298  }
2299 
2300  LWLockRelease(ProcArrayLock);
2301 
2302  *nvxids = count;
2303  return vxids;
2304 }
2305 
2306 /*
2307  * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
2308  *
2309  * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
2310  * of the specified VXIDs are still in critical sections of code.
2311  *
2312  * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
2313  * those numbers should be small enough for it not to be a problem.
2314  */
2315 bool
2317 {
2318  bool result = false;
2319  ProcArrayStruct *arrayP = procArray;
2320  int index;
2321 
2322  LWLockAcquire(ProcArrayLock, LW_SHARED);
2323 
2324  for (index = 0; index < arrayP->numProcs; index++)
2325  {
2326  int pgprocno = arrayP->pgprocnos[index];
2327  PGPROC *proc = &allProcs[pgprocno];
2328  VirtualTransactionId vxid;
2329 
2330  GET_VXID_FROM_PGPROC(vxid, *proc);
2331 
2332  if (proc->delayChkpt && VirtualTransactionIdIsValid(vxid))
2333  {
2334  int i;
2335 
2336  for (i = 0; i < nvxids; i++)
2337  {
2338  if (VirtualTransactionIdEquals(vxid, vxids[i]))
2339  {
2340  result = true;
2341  break;
2342  }
2343  }
2344  if (result)
2345  break;
2346  }
2347  }
2348 
2349  LWLockRelease(ProcArrayLock);
2350 
2351  return result;
2352 }
2353 
2354 /*
2355  * BackendPidGetProc -- get a backend's PGPROC given its PID
2356  *
2357  * Returns NULL if not found. Note that it is up to the caller to be
2358  * sure that the question remains meaningful for long enough for the
2359  * answer to be used ...
2360  */
2361 PGPROC *
2363 {
2364  PGPROC *result;
2365 
2366  if (pid == 0) /* never match dummy PGPROCs */
2367  return NULL;
2368 
2369  LWLockAcquire(ProcArrayLock, LW_SHARED);
2370 
2371  result = BackendPidGetProcWithLock(pid);
2372 
2373  LWLockRelease(ProcArrayLock);
2374 
2375  return result;
2376 }
2377 
2378 /*
2379  * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID
2380  *
2381  * Same as above, except caller must be holding ProcArrayLock. The found
2382  * entry, if any, can be assumed to be valid as long as the lock remains held.
2383  */
2384 PGPROC *
2386 {
2387  PGPROC *result = NULL;
2388  ProcArrayStruct *arrayP = procArray;
2389  int index;
2390 
2391  if (pid == 0) /* never match dummy PGPROCs */
2392  return NULL;
2393 
2394  for (index = 0; index < arrayP->numProcs; index++)
2395  {
2396  PGPROC *proc = &allProcs[arrayP->pgprocnos[index]];
2397 
2398  if (proc->pid == pid)
2399  {
2400  result = proc;
2401  break;
2402  }
2403  }
2404 
2405  return result;
2406 }
2407 
2408 /*
2409  * BackendXidGetPid -- get a backend's pid given its XID
2410  *
2411  * Returns 0 if not found or it's a prepared transaction. Note that
2412  * it is up to the caller to be sure that the question remains
2413  * meaningful for long enough for the answer to be used ...
2414  *
2415  * Only main transaction Ids are considered. This function is mainly
2416  * useful for determining what backend owns a lock.
2417  *
2418  * Beware that not every xact has an XID assigned. However, as long as you
2419  * only call this using an XID found on disk, you're safe.
2420  */
2421 int
2423 {
2424  int result = 0;
2425  ProcArrayStruct *arrayP = procArray;
2426  int index;
2427 
2428  if (xid == InvalidTransactionId) /* never match invalid xid */
2429  return 0;
2430 
2431  LWLockAcquire(ProcArrayLock, LW_SHARED);
2432 
2433  for (index = 0; index < arrayP->numProcs; index++)
2434  {
2435  int pgprocno = arrayP->pgprocnos[index];
2436  PGPROC *proc = &allProcs[pgprocno];
2437  PGXACT *pgxact = &allPgXact[pgprocno];
2438 
2439  if (pgxact->xid == xid)
2440  {
2441  result = proc->pid;
2442  break;
2443  }
2444  }
2445 
2446  LWLockRelease(ProcArrayLock);
2447 
2448  return result;
2449 }
2450 
2451 /*
2452  * IsBackendPid -- is a given pid a running backend
2453  *
2454  * This is not called by the backend, but is called by external modules.
2455  */
2456 bool
2458 {
2459  return (BackendPidGetProc(pid) != NULL);
2460 }
2461 
2462 
2463 /*
2464  * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
2465  *
2466  * The array is palloc'd. The number of valid entries is returned into *nvxids.
2467  *
2468  * The arguments allow filtering the set of VXIDs returned. Our own process
2469  * is always skipped. In addition:
2470  * If limitXmin is not InvalidTransactionId, skip processes with
2471  * xmin > limitXmin.
2472  * If excludeXmin0 is true, skip processes with xmin = 0.
2473  * If allDbs is false, skip processes attached to other databases.
2474  * If excludeVacuum isn't zero, skip processes for which
2475  * (vacuumFlags & excludeVacuum) is not zero.
2476  *
2477  * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
2478  * allow skipping backends whose oldest live snapshot is no older than
2479  * some snapshot we have. Since we examine the procarray with only shared
2480  * lock, there are race conditions: a backend could set its xmin just after
2481  * we look. Indeed, on multiprocessors with weak memory ordering, the
2482  * other backend could have set its xmin *before* we look. We know however
2483  * that such a backend must have held shared ProcArrayLock overlapping our
2484  * own hold of ProcArrayLock, else we would see its xmin update. Therefore,
2485  * any snapshot the other backend is taking concurrently with our scan cannot
2486  * consider any transactions as still running that we think are committed
2487  * (since backends must hold ProcArrayLock exclusive to commit).
2488  */
2490 GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
2491  bool allDbs, int excludeVacuum,
2492  int *nvxids)
2493 {
2494  VirtualTransactionId *vxids;
2495  ProcArrayStruct *arrayP = procArray;
2496  int count = 0;
2497  int index;
2498 
2499  /* allocate what's certainly enough result space */
2500  vxids = (VirtualTransactionId *)
2501  palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
2502 
2503  LWLockAcquire(ProcArrayLock, LW_SHARED);
2504 
2505  for (index = 0; index < arrayP->numProcs; index++)
2506  {
2507  int pgprocno = arrayP->pgprocnos[index];
2508  PGPROC *proc = &allProcs[pgprocno];
2509  PGXACT *pgxact = &allPgXact[pgprocno];
2510 
2511  if (proc == MyProc)
2512  continue;
2513 
2514  if (excludeVacuum & pgxact->vacuumFlags)
2515  continue;
2516 
2517  if (allDbs || proc->databaseId == MyDatabaseId)
2518  {
2519  /* Fetch xmin just once - might change on us */
2520  TransactionId pxmin = UINT32_ACCESS_ONCE(pgxact->xmin);
2521 
2522  if (excludeXmin0 && !TransactionIdIsValid(pxmin))
2523  continue;
2524 
2525  /*
2526  * InvalidTransactionId precedes all other XIDs, so a proc that
2527  * hasn't set xmin yet will not be rejected by this test.
2528  */
2529  if (!TransactionIdIsValid(limitXmin) ||
2530  TransactionIdPrecedesOrEquals(pxmin, limitXmin))
2531  {
2532  VirtualTransactionId vxid;
2533 
2534  GET_VXID_FROM_PGPROC(vxid, *proc);
2535  if (VirtualTransactionIdIsValid(vxid))
2536  vxids[count++] = vxid;
2537  }
2538  }
2539  }
2540 
2541  LWLockRelease(ProcArrayLock);
2542 
2543  *nvxids = count;
2544  return vxids;
2545 }
2546 
2547 /*
2548  * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
2549  *
2550  * Usage is limited to conflict resolution during recovery on standby servers.
2551  * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId
2552  * in cases where we cannot accurately determine a value for latestRemovedXid.
2553  *
2554  * If limitXmin is InvalidTransactionId then we want to kill everybody,
2555  * so we're not worried if they have a snapshot or not, nor does it really
2556  * matter what type of lock we hold.
2557  *
2558  * All callers that are checking xmins always now supply a valid and useful
2559  * value for limitXmin. The limitXmin is always lower than the lowest
2560  * numbered KnownAssignedXid that is not already a FATAL error. This is
2561  * because we only care about cleanup records that are cleaning up tuple
2562  * versions from committed transactions. In that case they will only occur
2563  * at the point where the record is less than the lowest running xid. That
2564  * allows us to say that if any backend takes a snapshot concurrently with
2565  * us then the conflict assessment made here would never include the snapshot
2566  * that is being derived. So we take LW_SHARED on the ProcArray and allow
2567  * concurrent snapshots when limitXmin is valid. We might think about adding
2568  * Assert(limitXmin < lowest(KnownAssignedXids))
2569  * but that would not be true in the case of FATAL errors lagging in array,
2570  * but we already know those are bogus anyway, so we skip that test.
2571  *
2572  * If dbOid is valid we skip backends attached to other databases.
2573  *
2574  * Be careful to *not* pfree the result from this function. We reuse
2575  * this array sufficiently often that we use malloc for the result.
2576  */
2579 {
2580  static VirtualTransactionId *vxids;
2581  ProcArrayStruct *arrayP = procArray;
2582  int count = 0;
2583  int index;
2584 
2585  /*
2586  * If first time through, get workspace to remember main XIDs in. We
2587  * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
2588  * result space, remembering room for a terminator.
2589  */
2590  if (vxids == NULL)
2591  {
2592  vxids = (VirtualTransactionId *)
2593  malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
2594  if (vxids == NULL)
2595  ereport(ERROR,
2596  (errcode(ERRCODE_OUT_OF_MEMORY),
2597  errmsg("out of memory")));
2598  }
2599 
2600  LWLockAcquire(ProcArrayLock, LW_SHARED);
2601 
2602  for (index = 0; index < arrayP->numProcs; index++)
2603  {
2604  int pgprocno = arrayP->pgprocnos[index];
2605  PGPROC *proc = &allProcs[pgprocno];
2606  PGXACT *pgxact = &allPgXact[pgprocno];
2607 
2608  /* Exclude prepared transactions */
2609  if (proc->pid == 0)
2610  continue;
2611 
2612  if (!OidIsValid(dbOid) ||
2613  proc->databaseId == dbOid)
2614  {
2615  /* Fetch xmin just once - can't change on us, but good coding */
2616  TransactionId pxmin = UINT32_ACCESS_ONCE(pgxact->xmin);
2617 
2618  /*
2619  * We ignore an invalid pxmin because this means that backend has
2620  * no snapshot currently. We hold a Share lock to avoid contention
2621  * with users taking snapshots. That is not a problem because the
2622  * current xmin is always at least one higher than the latest
2623  * removed xid, so any new snapshot would never conflict with the
2624  * test here.
2625  */
2626  if (!TransactionIdIsValid(limitXmin) ||
2627  (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin)))
2628  {
2629  VirtualTransactionId vxid;
2630 
2631  GET_VXID_FROM_PGPROC(vxid, *proc);
2632  if (VirtualTransactionIdIsValid(vxid))
2633  vxids[count++] = vxid;
2634  }
2635  }
2636  }
2637 
2638  LWLockRelease(ProcArrayLock);
2639 
2640  /* add the terminator */
2641  vxids[count].backendId = InvalidBackendId;
2643 
2644  return vxids;
2645 }
2646 
2647 /*
2648  * CancelVirtualTransaction - used in recovery conflict processing
2649  *
2650  * Returns pid of the process signaled, or 0 if not found.
2651  */
2652 pid_t
2654 {
2655  ProcArrayStruct *arrayP = procArray;
2656  int index;
2657  pid_t pid = 0;
2658 
2659  LWLockAcquire(ProcArrayLock, LW_SHARED);
2660 
2661  for (index = 0; index < arrayP->numProcs; index++)
2662  {
2663  int pgprocno = arrayP->pgprocnos[index];
2664  PGPROC *proc = &allProcs[pgprocno];
2665  VirtualTransactionId procvxid;
2666 
2667  GET_VXID_FROM_PGPROC(procvxid, *proc);
2668 
2669  if (procvxid.backendId == vxid.backendId &&
2670  procvxid.localTransactionId == vxid.localTransactionId)
2671  {
2672  proc->recoveryConflictPending = true;
2673  pid = proc->pid;
2674  if (pid != 0)
2675  {
2676  /*
2677  * Kill the pid if it's still here. If not, that's what we
2678  * wanted so ignore any errors.
2679  */
2680  (void) SendProcSignal(pid, sigmode, vxid.backendId);
2681  }
2682  break;
2683  }
2684  }
2685 
2686  LWLockRelease(ProcArrayLock);
2687 
2688  return pid;
2689 }
2690 
2691 /*
2692  * MinimumActiveBackends --- count backends (other than myself) that are
2693  * in active transactions. Return true if the count exceeds the
2694  * minimum threshold passed. This is used as a heuristic to decide if
2695  * a pre-XLOG-flush delay is worthwhile during commit.
2696  *
2697  * Do not count backends that are blocked waiting for locks, since they are
2698  * not going to get to run until someone else commits.
2699  */
2700 bool
2702 {
2703  ProcArrayStruct *arrayP = procArray;
2704  int count = 0;
2705  int index;
2706 
2707  /* Quick short-circuit if no minimum is specified */
2708  if (min == 0)
2709  return true;
2710 
2711  /*
2712  * Note: for speed, we don't acquire ProcArrayLock. This is a little bit
2713  * bogus, but since we are only testing fields for zero or nonzero, it
2714  * should be OK. The result is only used for heuristic purposes anyway...
2715  */
2716  for (index = 0; index < arrayP->numProcs; index++)
2717  {
2718  int pgprocno = arrayP->pgprocnos[index];
2719  PGPROC *proc = &allProcs[pgprocno];
2720  PGXACT *pgxact = &allPgXact[pgprocno];
2721 
2722  /*
2723  * Since we're not holding a lock, need to be prepared to deal with
2724  * garbage, as someone could have incremented numProcs but not yet
2725  * filled the structure.
2726  *
2727  * If someone just decremented numProcs, 'proc' could also point to a
2728  * PGPROC entry that's no longer in the array. It still points to a
2729  * PGPROC struct, though, because freed PGPROC entries just go to the
2730  * free list and are recycled. Its contents are nonsense in that case,
2731  * but that's acceptable for this function.
2732  */
2733  if (pgprocno == -1)
2734  continue; /* do not count deleted entries */
2735  if (proc == MyProc)
2736  continue; /* do not count myself */
2737  if (pgxact->xid == InvalidTransactionId)
2738  continue; /* do not count if no XID assigned */
2739  if (proc->pid == 0)
2740  continue; /* do not count prepared xacts */
2741  if (proc->waitLock != NULL)
2742  continue; /* do not count if blocked on a lock */
2743  count++;
2744  if (count >= min)
2745  break;
2746  }
2747 
2748  return count >= min;
2749 }
2750 
2751 /*
2752  * CountDBBackends --- count backends that are using specified database
2753  */
2754 int
2756 {
2757  ProcArrayStruct *arrayP = procArray;
2758  int count = 0;
2759  int index;
2760 
2761  LWLockAcquire(ProcArrayLock, LW_SHARED);
2762 
2763  for (index = 0; index < arrayP->numProcs; index++)
2764  {
2765  int pgprocno = arrayP->pgprocnos[index];
2766  PGPROC *proc = &allProcs[pgprocno];
2767 
2768  if (proc->pid == 0)
2769  continue; /* do not count prepared xacts */
2770  if (!OidIsValid(databaseid) ||
2771  proc->databaseId == databaseid)
2772  count++;
2773  }
2774 
2775  LWLockRelease(ProcArrayLock);
2776 
2777  return count;
2778 }
2779 
2780 /*
2781  * CountDBConnections --- counts database backends ignoring any background
2782  * worker processes
2783  */
2784 int
2786 {
2787  ProcArrayStruct *arrayP = procArray;
2788  int count = 0;
2789  int index;
2790 
2791  LWLockAcquire(ProcArrayLock, LW_SHARED);
2792 
2793  for (index = 0; index < arrayP->numProcs; index++)
2794  {
2795  int pgprocno = arrayP->pgprocnos[index];
2796  PGPROC *proc = &allProcs[pgprocno];
2797 
2798  if (proc->pid == 0)
2799  continue; /* do not count prepared xacts */
2800  if (proc->isBackgroundWorker)
2801  continue; /* do not count background workers */
2802  if (!OidIsValid(databaseid) ||
2803  proc->databaseId == databaseid)
2804  count++;
2805  }
2806 
2807  LWLockRelease(ProcArrayLock);
2808 
2809  return count;
2810 }
2811 
2812 /*
2813  * CancelDBBackends --- cancel backends that are using specified database
2814  */
2815 void
2816 CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
2817 {
2818  ProcArrayStruct *arrayP = procArray;
2819  int index;
2820  pid_t pid = 0;
2821 
2822  /* tell all backends to die */
2823  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2824 
2825  for (index = 0; index < arrayP->numProcs; index++)
2826  {
2827  int pgprocno = arrayP->pgprocnos[index];
2828  PGPROC *proc = &allProcs[pgprocno];
2829 
2830  if (databaseid == InvalidOid || proc->databaseId == databaseid)
2831  {
2832  VirtualTransactionId procvxid;
2833 
2834  GET_VXID_FROM_PGPROC(procvxid, *proc);
2835 
2836  proc->recoveryConflictPending = conflictPending;
2837  pid = proc->pid;
2838  if (pid != 0)
2839  {
2840  /*
2841  * Kill the pid if it's still here. If not, that's what we
2842  * wanted so ignore any errors.
2843  */
2844  (void) SendProcSignal(pid, sigmode, procvxid.backendId);
2845  }
2846  }
2847  }
2848 
2849  LWLockRelease(ProcArrayLock);
2850 }
2851 
2852 /*
2853  * CountUserBackends --- count backends that are used by specified user
2854  */
2855 int
2857 {
2858  ProcArrayStruct *arrayP = procArray;
2859  int count = 0;
2860  int index;
2861 
2862  LWLockAcquire(ProcArrayLock, LW_SHARED);
2863 
2864  for (index = 0; index < arrayP->numProcs; index++)
2865  {
2866  int pgprocno = arrayP->pgprocnos[index];
2867  PGPROC *proc = &allProcs[pgprocno];
2868 
2869  if (proc->pid == 0)
2870  continue; /* do not count prepared xacts */
2871  if (proc->isBackgroundWorker)
2872  continue; /* do not count background workers */
2873  if (proc->roleId == roleid)
2874  count++;
2875  }
2876 
2877  LWLockRelease(ProcArrayLock);
2878 
2879  return count;
2880 }
2881 
2882 /*
2883  * CountOtherDBBackends -- check for other backends running in the given DB
2884  *
2885  * If there are other backends in the DB, we will wait a maximum of 5 seconds
2886  * for them to exit. Autovacuum backends are encouraged to exit early by
2887  * sending them SIGTERM, but normal user backends are just waited for.
2888  *
2889  * The current backend is always ignored; it is caller's responsibility to
2890  * check whether the current backend uses the given DB, if it's important.
2891  *
2892  * Returns true if there are (still) other backends in the DB, false if not.
2893  * Also, *nbackends and *nprepared are set to the number of other backends
2894  * and prepared transactions in the DB, respectively.
2895  *
2896  * This function is used to interlock DROP DATABASE and related commands
2897  * against there being any active backends in the target DB --- dropping the
2898  * DB while active backends remain would be a Bad Thing. Note that we cannot
2899  * detect here the possibility of a newly-started backend that is trying to
2900  * connect to the doomed database, so additional interlocking is needed during
2901  * backend startup. The caller should normally hold an exclusive lock on the
2902  * target DB before calling this, which is one reason we mustn't wait
2903  * indefinitely.
2904  */
2905 bool
2906 CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
2907 {
2908  ProcArrayStruct *arrayP = procArray;
2909 
2910 #define MAXAUTOVACPIDS 10 /* max autovacs to SIGTERM per iteration */
2911  int autovac_pids[MAXAUTOVACPIDS];
2912  int tries;
2913 
2914  /* 50 tries with 100ms sleep between tries makes 5 sec total wait */
2915  for (tries = 0; tries < 50; tries++)
2916  {
2917  int nautovacs = 0;
2918  bool found = false;
2919  int index;
2920 
2922 
2923  *nbackends = *nprepared = 0;
2924 
2925  LWLockAcquire(ProcArrayLock, LW_SHARED);
2926 
2927  for (index = 0; index < arrayP->numProcs; index++)
2928  {
2929  int pgprocno = arrayP->pgprocnos[index];
2930  PGPROC *proc = &allProcs[pgprocno];
2931  PGXACT *pgxact = &allPgXact[pgprocno];
2932 
2933  if (proc->databaseId != databaseId)
2934  continue;
2935  if (proc == MyProc)
2936  continue;
2937 
2938  found = true;
2939 
2940  if (proc->pid == 0)
2941  (*nprepared)++;
2942  else
2943  {
2944  (*nbackends)++;
2945  if ((pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) &&
2946  nautovacs < MAXAUTOVACPIDS)
2947  autovac_pids[nautovacs++] = proc->pid;
2948  }
2949  }
2950 
2951  LWLockRelease(ProcArrayLock);
2952 
2953  if (!found)
2954  return false; /* no conflicting backends, so done */
2955 
2956  /*
2957  * Send SIGTERM to any conflicting autovacuums before sleeping. We
2958  * postpone this step until after the loop because we don't want to
2959  * hold ProcArrayLock while issuing kill(). We have no idea what might
2960  * block kill() inside the kernel...
2961  */
2962  for (index = 0; index < nautovacs; index++)
2963  (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */
2964 
2965  /* sleep, then try again */
2966  pg_usleep(100 * 1000L); /* 100ms */
2967  }
2968 
2969  return true; /* timed out, still conflicts */
2970 }
2971 
2972 /*
2973  * Terminate existing connections to the specified database. This routine
2974  * is used by the DROP DATABASE command when user has asked to forcefully
2975  * drop the database.
2976  *
2977  * The current backend is always ignored; it is caller's responsibility to
2978  * check whether the current backend uses the given DB, if it's important.
2979  *
2980  * It doesn't allow to terminate the connections even if there is a one
2981  * backend with the prepared transaction in the target database.
2982  */
2983 void
2985 {
2986  ProcArrayStruct *arrayP = procArray;
2987  List *pids = NIL;
2988  int nprepared = 0;
2989  int i;
2990 
2991  LWLockAcquire(ProcArrayLock, LW_SHARED);
2992 
2993  for (i = 0; i < procArray->numProcs; i++)
2994  {
2995  int pgprocno = arrayP->pgprocnos[i];
2996  PGPROC *proc = &allProcs[pgprocno];
2997 
2998  if (proc->databaseId != databaseId)
2999  continue;
3000  if (proc == MyProc)
3001  continue;
3002 
3003  if (proc->pid != 0)
3004  pids = lappend_int(pids, proc->pid);
3005  else
3006  nprepared++;
3007  }
3008 
3009  LWLockRelease(ProcArrayLock);
3010 
3011  if (nprepared > 0)
3012  ereport(ERROR,
3013  (errcode(ERRCODE_OBJECT_IN_USE),
3014  errmsg("database \"%s\" is being used by prepared transaction",
3015  get_database_name(databaseId)),
3016  errdetail_plural("There is %d prepared transaction using the database.",
3017  "There are %d prepared transactions using the database.",
3018  nprepared,
3019  nprepared)));
3020 
3021  if (pids)
3022  {
3023  ListCell *lc;
3024 
3025  /*
3026  * Check whether we have the necessary rights to terminate other
3027  * sessions. We don't terminate any session until we ensure that we
3028  * have rights on all the sessions to be terminated. These checks are
3029  * the same as we do in pg_terminate_backend.
3030  *
3031  * In this case we don't raise some warnings - like "PID %d is not a
3032  * PostgreSQL server process", because for us already finished session
3033  * is not a problem.
3034  */
3035  foreach(lc, pids)
3036  {
3037  int pid = lfirst_int(lc);
3038  PGPROC *proc = BackendPidGetProc(pid);
3039 
3040  if (proc != NULL)
3041  {
3042  /* Only allow superusers to signal superuser-owned backends. */
3043  if (superuser_arg(proc->roleId) && !superuser())
3044  ereport(ERROR,
3045  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3046  errmsg("must be a superuser to terminate superuser process")));
3047 
3048  /* Users can signal backends they have role membership in. */
3049  if (!has_privs_of_role(GetUserId(), proc->roleId) &&
3050  !has_privs_of_role(GetUserId(), DEFAULT_ROLE_SIGNAL_BACKENDID))
3051  ereport(ERROR,
3052  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3053  errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend")));
3054  }
3055  }
3056 
3057  /*
3058  * There's a race condition here: once we release the ProcArrayLock,
3059  * it's possible for the session to exit before we issue kill. That
3060  * race condition possibility seems too unlikely to worry about. See
3061  * pg_signal_backend.
3062  */
3063  foreach(lc, pids)
3064  {
3065  int pid = lfirst_int(lc);
3066  PGPROC *proc = BackendPidGetProc(pid);
3067 
3068  if (proc != NULL)
3069  {
3070  /*
3071  * If we have setsid(), signal the backend's whole process
3072  * group
3073  */
3074 #ifdef HAVE_SETSID
3075  (void) kill(-pid, SIGTERM);
3076 #else
3077  (void) kill(pid, SIGTERM);
3078 #endif
3079  }
3080  }
3081  }
3082 }
3083 
3084 /*
3085  * ProcArraySetReplicationSlotXmin
3086  *
3087  * Install limits to future computations of the xmin horizon to prevent vacuum
3088  * and HOT pruning from removing affected rows still needed by clients with
3089  * replication slots.
3090  */
3091 void
3093  bool already_locked)
3094 {
3095  Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
3096 
3097  if (!already_locked)
3098  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3099 
3100  procArray->replication_slot_xmin = xmin;
3101  procArray->replication_slot_catalog_xmin = catalog_xmin;
3102 
3103  if (!already_locked)
3104  LWLockRelease(ProcArrayLock);
3105 }
3106 
3107 /*
3108  * ProcArrayGetReplicationSlotXmin
3109  *
3110  * Return the current slot xmin limits. That's useful to be able to remove
3111  * data that's older than those limits.
3112  */
3113 void
3115  TransactionId *catalog_xmin)
3116 {
3117  LWLockAcquire(ProcArrayLock, LW_SHARED);
3118 
3119  if (xmin != NULL)
3120  *xmin = procArray->replication_slot_xmin;
3121 
3122  if (catalog_xmin != NULL)
3123  *catalog_xmin = procArray->replication_slot_catalog_xmin;
3124 
3125  LWLockRelease(ProcArrayLock);
3126 }
3127 
3128 
3129 #define XidCacheRemove(i) \
3130  do { \
3131  MyProc->subxids.xids[i] = MyProc->subxids.xids[MyPgXact->nxids - 1]; \
3132  pg_write_barrier(); \
3133  MyPgXact->nxids--; \
3134  } while (0)
3135 
3136 /*
3137  * XidCacheRemoveRunningXids
3138  *
3139  * Remove a bunch of TransactionIds from the list of known-running
3140  * subtransactions for my backend. Both the specified xid and those in
3141  * the xids[] array (of length nxids) are removed from the subxids cache.
3142  * latestXid must be the latest XID among the group.
3143  */
3144 void
3146  int nxids, const TransactionId *xids,
3147  TransactionId latestXid)
3148 {
3149  int i,
3150  j;
3151 
3153 
3154  /*
3155  * We must hold ProcArrayLock exclusively in order to remove transactions
3156  * from the PGPROC array. (See src/backend/access/transam/README.) It's
3157  * possible this could be relaxed since we know this routine is only used
3158  * to abort subtransactions, but pending closer analysis we'd best be
3159  * conservative.
3160  *
3161  * Note that we do not have to be careful about memory ordering of our own
3162  * reads wrt. GetNewTransactionId() here - only this process can modify
3163  * relevant fields of MyProc/MyPgXact. But we do have to be careful about
3164  * our own writes being well ordered.
3165  */
3166  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3167 
3168  /*
3169  * Under normal circumstances xid and xids[] will be in increasing order,
3170  * as will be the entries in subxids. Scan backwards to avoid O(N^2)
3171  * behavior when removing a lot of xids.
3172  */
3173  for (i = nxids - 1; i >= 0; i--)
3174  {
3175  TransactionId anxid = xids[i];
3176 
3177  for (j = MyPgXact->nxids - 1; j >= 0; j--)
3178  {
3179  if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
3180  {
3181  XidCacheRemove(j);
3182  break;
3183  }
3184  }
3185 
3186  /*
3187  * Ordinarily we should have found it, unless the cache has
3188  * overflowed. However it's also possible for this routine to be
3189  * invoked multiple times for the same subtransaction, in case of an
3190  * error during AbortSubTransaction. So instead of Assert, emit a
3191  * debug warning.
3192  */
3193  if (j < 0 && !MyPgXact->overflowed)
3194  elog(WARNING, "did not find subXID %u in MyProc", anxid);
3195  }
3196 
3197  for (j = MyPgXact->nxids - 1; j >= 0; j--)
3198  {
3199  if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
3200  {
3201  XidCacheRemove(j);
3202  break;
3203  }
3204  }
3205  /* Ordinarily we should have found it, unless the cache has overflowed */
3206  if (j < 0 && !MyPgXact->overflowed)
3207  elog(WARNING, "did not find subXID %u in MyProc", xid);
3208 
3209  /* Also advance global latestCompletedXid while holding the lock */
3211  latestXid))
3213 
3214  LWLockRelease(ProcArrayLock);
3215 }
3216 
3217 #ifdef XIDCACHE_DEBUG
3218 
3219 /*
3220  * Print stats about effectiveness of XID cache
3221  */
3222 static void
3223 DisplayXidCache(void)
3224 {
3225  fprintf(stderr,
3226  "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n",
3227  xc_by_recent_xmin,
3228  xc_by_known_xact,
3229  xc_by_my_xact,
3230  xc_by_latest_xid,
3231  xc_by_main_xid,
3232  xc_by_child_xid,
3233  xc_by_known_assigned,
3234  xc_no_overflow,
3235  xc_slow_answer);
3236 }
3237 #endif /* XIDCACHE_DEBUG */
3238 
3239 
3240 /* ----------------------------------------------
3241  * KnownAssignedTransactionIds sub-module
3242  * ----------------------------------------------
3243  */
3244 
3245 /*
3246  * In Hot Standby mode, we maintain a list of transactions that are (or were)
3247  * running in the master at the current point in WAL. These XIDs must be
3248  * treated as running by standby transactions, even though they are not in
3249  * the standby server's PGXACT array.
3250  *
3251  * We record all XIDs that we know have been assigned. That includes all the
3252  * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
3253  * been assigned. We can deduce the existence of unobserved XIDs because we
3254  * know XIDs are assigned in sequence, with no gaps. The KnownAssignedXids
3255  * list expands as new XIDs are observed or inferred, and contracts when
3256  * transaction completion records arrive.
3257  *
3258  * During hot standby we do not fret too much about the distinction between
3259  * top-level XIDs and subtransaction XIDs. We store both together in the
3260  * KnownAssignedXids list. In backends, this is copied into snapshots in
3261  * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot()
3262  * doesn't care about the distinction either. Subtransaction XIDs are
3263  * effectively treated as top-level XIDs and in the typical case pg_subtrans
3264  * links are *not* maintained (which does not affect visibility).
3265  *
3266  * We have room in KnownAssignedXids and in snapshots to hold maxProcs *
3267  * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every master transaction must
3268  * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at
3269  * least every PGPROC_MAX_CACHED_SUBXIDS. When we receive one of these
3270  * records, we mark the subXIDs as children of the top XID in pg_subtrans,
3271  * and then remove them from KnownAssignedXids. This prevents overflow of
3272  * KnownAssignedXids and snapshots, at the cost that status checks for these
3273  * subXIDs will take a slower path through TransactionIdIsInProgress().
3274  * This means that KnownAssignedXids is not necessarily complete for subXIDs,
3275  * though it should be complete for top-level XIDs; this is the same situation
3276  * that holds with respect to the PGPROC entries in normal running.
3277  *
3278  * When we throw away subXIDs from KnownAssignedXids, we need to keep track of
3279  * that, similarly to tracking overflow of a PGPROC's subxids array. We do
3280  * that by remembering the lastOverflowedXid, ie the last thrown-away subXID.
3281  * As long as that is within the range of interesting XIDs, we have to assume
3282  * that subXIDs are missing from snapshots. (Note that subXID overflow occurs
3283  * on primary when 65th subXID arrives, whereas on standby it occurs when 64th
3284  * subXID arrives - that is not an error.)
3285  *
3286  * Should a backend on primary somehow disappear before it can write an abort
3287  * record, then we just leave those XIDs in KnownAssignedXids. They actually
3288  * aborted but we think they were running; the distinction is irrelevant
3289  * because either way any changes done by the transaction are not visible to
3290  * backends in the standby. We prune KnownAssignedXids when
3291  * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the
3292  * array due to such dead XIDs.
3293  */
3294 
3295 /*
3296  * RecordKnownAssignedTransactionIds
3297  * Record the given XID in KnownAssignedXids, as well as any preceding
3298  * unobserved XIDs.
3299  *
3300  * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
3301  * associated with a transaction. Must be called for each record after we
3302  * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
3303  *
3304  * Called during recovery in analogy with and in place of GetNewTransactionId()
3305  */
3306 void
3308 {
3312 
3313  elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
3314  xid, latestObservedXid);
3315 
3316  /*
3317  * When a newly observed xid arrives, it is frequently the case that it is
3318  * *not* the next xid in sequence. When this occurs, we must treat the
3319  * intervening xids as running also.
3320  */
3322  {
3323  TransactionId next_expected_xid;
3324 
3325  /*
3326  * Extend subtrans like we do in GetNewTransactionId() during normal
3327  * operation using individual extend steps. Note that we do not need
3328  * to extend clog since its extensions are WAL logged.
3329  *
3330  * This part has to be done regardless of standbyState since we
3331  * immediately start assigning subtransactions to their toplevel
3332  * transactions.
3333  */
3334  next_expected_xid = latestObservedXid;
3335  while (TransactionIdPrecedes(next_expected_xid, xid))
3336  {
3337  TransactionIdAdvance(next_expected_xid);
3338  ExtendSUBTRANS(next_expected_xid);
3339  }
3340  Assert(next_expected_xid == xid);
3341 
3342  /*
3343  * If the KnownAssignedXids machinery isn't up yet, there's nothing
3344  * more to do since we don't track assigned xids yet.
3345  */
3347  {
3348  latestObservedXid = xid;
3349  return;
3350  }
3351 
3352  /*
3353  * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
3354  */
3355  next_expected_xid = latestObservedXid;
3356  TransactionIdAdvance(next_expected_xid);
3357  KnownAssignedXidsAdd(next_expected_xid, xid, false);
3358 
3359  /*
3360  * Now we can advance latestObservedXid
3361  */
3362  latestObservedXid = xid;
3363 
3364  /* ShmemVariableCache->nextFullXid must be beyond any observed xid */
3366  next_expected_xid = latestObservedXid;
3367  TransactionIdAdvance(next_expected_xid);
3368  }
3369 }
3370 
3371 /*
3372  * ExpireTreeKnownAssignedTransactionIds
3373  * Remove the given XIDs from KnownAssignedXids.
3374  *
3375  * Called during recovery in analogy with and in place of ProcArrayEndTransaction()
3376  */
3377 void
3379  TransactionId *subxids, TransactionId max_xid)
3380 {
3382 
3383  /*
3384  * Uses same locking as transaction commit
3385  */
3386  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3387 
3388  KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
3389 
3390  /* As in ProcArrayEndTransaction, advance latestCompletedXid */
3392  max_xid))
3394 
3395  LWLockRelease(ProcArrayLock);
3396 }
3397 
3398 /*
3399  * ExpireAllKnownAssignedTransactionIds
3400  * Remove all entries in KnownAssignedXids
3401  */
3402 void
3404 {
3405  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3407  LWLockRelease(ProcArrayLock);
3408 }
3409 
3410 /*
3411  * ExpireOldKnownAssignedTransactionIds
3412  * Remove KnownAssignedXids entries preceding the given XID
3413  */
3414 void
3416 {
3417  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3419  LWLockRelease(ProcArrayLock);
3420 }
3421 
3422 
3423 /*
3424  * Private module functions to manipulate KnownAssignedXids
3425  *
3426  * There are 5 main uses of the KnownAssignedXids data structure:
3427  *
3428  * * backends taking snapshots - all valid XIDs need to be copied out
3429  * * backends seeking to determine presence of a specific XID
3430  * * startup process adding new known-assigned XIDs
3431  * * startup process removing specific XIDs as transactions end
3432  * * startup process pruning array when special WAL records arrive
3433  *
3434  * This data structure is known to be a hot spot during Hot Standby, so we
3435  * go to some lengths to make these operations as efficient and as concurrent
3436  * as possible.
3437  *
3438  * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes
3439  * order, to be exact --- to allow binary search for specific XIDs. Note:
3440  * in general TransactionIdPrecedes would not provide a total order, but
3441  * we know that the entries present at any instant should not extend across
3442  * a large enough fraction of XID space to wrap around (the master would
3443  * shut down for fear of XID wrap long before that happens). So it's OK to
3444  * use TransactionIdPrecedes as a binary-search comparator.
3445  *
3446  * It's cheap to maintain the sortedness during insertions, since new known
3447  * XIDs are always reported in XID order; we just append them at the right.
3448  *
3449  * To keep individual deletions cheap, we need to allow gaps in the array.
3450  * This is implemented by marking array elements as valid or invalid using
3451  * the parallel boolean array KnownAssignedXidsValid[]. A deletion is done
3452  * by setting KnownAssignedXidsValid[i] to false, *without* clearing the
3453  * XID entry itself. This preserves the property that the XID entries are
3454  * sorted, so we can do binary searches easily. Periodically we compress
3455  * out the unused entries; that's much cheaper than having to compress the
3456  * array immediately on every deletion.
3457  *
3458  * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
3459  * are those with indexes tail <= i < head; items outside this subscript range
3460  * have unspecified contents. When head reaches the end of the array, we
3461  * force compression of unused entries rather than wrapping around, since
3462  * allowing wraparound would greatly complicate the search logic. We maintain
3463  * an explicit tail pointer so that pruning of old XIDs can be done without
3464  * immediately moving the array contents. In most cases only a small fraction
3465  * of the array contains valid entries at any instant.
3466  *
3467  * Although only the startup process can ever change the KnownAssignedXids
3468  * data structure, we still need interlocking so that standby backends will
3469  * not observe invalid intermediate states. The convention is that backends
3470  * must hold shared ProcArrayLock to examine the array. To remove XIDs from
3471  * the array, the startup process must hold ProcArrayLock exclusively, for
3472  * the usual transactional reasons (compare commit/abort of a transaction
3473  * during normal running). Compressing unused entries out of the array
3474  * likewise requires exclusive lock. To add XIDs to the array, we just insert
3475  * them into slots to the right of the head pointer and then advance the head
3476  * pointer. This wouldn't require any lock at all, except that on machines
3477  * with weak memory ordering we need to be careful that other processors
3478  * see the array element changes before they see the head pointer change.
3479  * We handle this by using a spinlock to protect reads and writes of the
3480  * head/tail pointers. (We could dispense with the spinlock if we were to
3481  * create suitable memory access barrier primitives and use those instead.)
3482  * The spinlock must be taken to read or write the head/tail pointers unless
3483  * the caller holds ProcArrayLock exclusively.
3484  *
3485  * Algorithmic analysis:
3486  *
3487  * If we have a maximum of M slots, with N XIDs currently spread across
3488  * S elements then we have N <= S <= M always.
3489  *
3490  * * Adding a new XID is O(1) and needs little locking (unless compression
3491  * must happen)
3492  * * Compressing the array is O(S) and requires exclusive lock
3493  * * Removing an XID is O(logS) and requires exclusive lock
3494  * * Taking a snapshot is O(S) and requires shared lock
3495  * * Checking for an XID is O(logS) and requires shared lock
3496  *
3497  * In comparison, using a hash table for KnownAssignedXids would mean that
3498  * taking snapshots would be O(M). If we can maintain S << M then the
3499  * sorted array technique will deliver significantly faster snapshots.
3500  * If we try to keep S too small then we will spend too much time compressing,
3501  * so there is an optimal point for any workload mix. We use a heuristic to
3502  * decide when to compress the array, though trimming also helps reduce
3503  * frequency of compressing. The heuristic requires us to track the number of
3504  * currently valid XIDs in the array.
3505  */
3506 
3507 
3508 /*
3509  * Compress KnownAssignedXids by shifting valid data down to the start of the
3510  * array, removing any gaps.
3511  *
3512  * A compression step is forced if "force" is true, otherwise we do it
3513  * only if a heuristic indicates it's a good time to do it.
3514  *
3515  * Caller must hold ProcArrayLock in exclusive mode.
3516  */
3517 static void
3519 {
3520  ProcArrayStruct *pArray = procArray;
3521  int head,
3522  tail;
3523  int compress_index;
3524  int i;
3525 
3526  /* no spinlock required since we hold ProcArrayLock exclusively */
3527  head = pArray->headKnownAssignedXids;
3528  tail = pArray->tailKnownAssignedXids;
3529 
3530  if (!force)
3531  {
3532  /*
3533  * If we can choose how much to compress, use a heuristic to avoid
3534  * compressing too often or not often enough.
3535  *
3536  * Heuristic is if we have a large enough current spread and less than
3537  * 50% of the elements are currently in use, then compress. This
3538  * should ensure we compress fairly infrequently. We could compress
3539  * less often though the virtual array would spread out more and
3540  * snapshots would become more expensive.
3541  */
3542  int nelements = head - tail;
3543 
3544  if (nelements < 4 * PROCARRAY_MAXPROCS ||
3545  nelements < 2 * pArray->numKnownAssignedXids)
3546  return;
3547  }
3548 
3549  /*
3550  * We compress the array by reading the valid values from tail to head,
3551  * re-aligning data to 0th element.
3552  */
3553  compress_index = 0;
3554  for (i = tail; i < head; i++)
3555  {
3556  if (KnownAssignedXidsValid[i])
3557  {
3558  KnownAssignedXids[compress_index] = KnownAssignedXids[i];
3559  KnownAssignedXidsValid[compress_index] = true;
3560  compress_index++;
3561  }
3562  }
3563 
3564  pArray->tailKnownAssignedXids = 0;
3565  pArray->headKnownAssignedXids = compress_index;
3566 }
3567 
3568 /*
3569  * Add xids into KnownAssignedXids at the head of the array.
3570  *
3571  * xids from from_xid to to_xid, inclusive, are added to the array.
3572  *
3573  * If exclusive_lock is true then caller already holds ProcArrayLock in
3574  * exclusive mode, so we need no extra locking here. Else caller holds no
3575  * lock, so we need to be sure we maintain sufficient interlocks against
3576  * concurrent readers. (Only the startup process ever calls this, so no need
3577  * to worry about concurrent writers.)
3578  */
3579 static void
3581  bool exclusive_lock)
3582 {
3583  ProcArrayStruct *pArray = procArray;
3584  TransactionId next_xid;
3585  int head,
3586  tail;
3587  int nxids;
3588  int i;
3589 
3590  Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
3591 
3592  /*
3593  * Calculate how many array slots we'll need. Normally this is cheap; in
3594  * the unusual case where the XIDs cross the wrap point, we do it the hard
3595  * way.
3596  */
3597  if (to_xid >= from_xid)
3598  nxids = to_xid - from_xid + 1;
3599  else
3600  {
3601  nxids = 1;
3602  next_xid = from_xid;
3603  while (TransactionIdPrecedes(next_xid, to_xid))
3604  {
3605  nxids++;
3606  TransactionIdAdvance(next_xid);
3607  }
3608  }
3609 
3610  /*
3611  * Since only the startup process modifies the head/tail pointers, we
3612  * don't need a lock to read them here.
3613  */
3614  head = pArray->headKnownAssignedXids;
3615  tail = pArray->tailKnownAssignedXids;
3616 
3617  Assert(head >= 0 && head <= pArray->maxKnownAssignedXids);
3618  Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids);
3619 
3620  /*
3621  * Verify that insertions occur in TransactionId sequence. Note that even
3622  * if the last existing element is marked invalid, it must still have a
3623  * correctly sequenced XID value.
3624  */
3625  if (head > tail &&
3626  TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid))
3627  {
3629  elog(ERROR, "out-of-order XID insertion in KnownAssignedXids");
3630  }
3631 
3632  /*
3633  * If our xids won't fit in the remaining space, compress out free space
3634  */
3635  if (head + nxids > pArray->maxKnownAssignedXids)
3636  {
3637  /* must hold lock to compress */
3638  if (!exclusive_lock)
3639  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3640 
3642 
3643  head = pArray->headKnownAssignedXids;
3644  /* note: we no longer care about the tail pointer */
3645 
3646  if (!exclusive_lock)
3647  LWLockRelease(ProcArrayLock);
3648 
3649  /*
3650  * If it still won't fit then we're out of memory
3651  */
3652  if (head + nxids > pArray->maxKnownAssignedXids)
3653  elog(ERROR, "too many KnownAssignedXids");
3654  }
3655 
3656  /* Now we can insert the xids into the space starting at head */
3657  next_xid = from_xid;
3658  for (i = 0; i < nxids; i++)
3659  {
3660  KnownAssignedXids[head] = next_xid;
3661  KnownAssignedXidsValid[head] = true;
3662  TransactionIdAdvance(next_xid);
3663  head++;
3664  }
3665 
3666  /* Adjust count of number of valid entries */
3667  pArray->numKnownAssignedXids += nxids;
3668 
3669  /*
3670  * Now update the head pointer. We use a spinlock to protect this
3671  * pointer, not because the update is likely to be non-atomic, but to
3672  * ensure that other processors see the above array updates before they
3673  * see the head pointer change.
3674  *
3675  * If we're holding ProcArrayLock exclusively, there's no need to take the
3676  * spinlock.
3677  */
3678  if (exclusive_lock)
3679  pArray->headKnownAssignedXids = head;
3680  else
3681  {
3683  pArray->headKnownAssignedXids = head;
3685  }
3686 }
3687 
3688 /*
3689  * KnownAssignedXidsSearch
3690  *
3691  * Searches KnownAssignedXids for a specific xid and optionally removes it.
3692  * Returns true if it was found, false if not.
3693  *
3694  * Caller must hold ProcArrayLock in shared or exclusive mode.
3695  * Exclusive lock must be held for remove = true.
3696  */
3697 static bool
3699 {
3700  ProcArrayStruct *pArray = procArray;
3701  int first,
3702  last;
3703  int head;
3704  int tail;
3705  int result_index = -1;
3706 
3707  if (remove)
3708  {
3709  /* we hold ProcArrayLock exclusively, so no need for spinlock */
3710  tail = pArray->tailKnownAssignedXids;
3711  head = pArray->headKnownAssignedXids;
3712  }
3713  else
3714  {
3715  /* take spinlock to ensure we see up-to-date array contents */
3717  tail = pArray->tailKnownAssignedXids;
3718  head = pArray->headKnownAssignedXids;
3720  }
3721 
3722  /*
3723  * Standard binary search. Note we can ignore the KnownAssignedXidsValid
3724  * array here, since even invalid entries will contain sorted XIDs.
3725  */
3726  first = tail;
3727  last = head - 1;
3728  while (first <= last)
3729  {
3730  int mid_index;
3731  TransactionId mid_xid;
3732 
3733  mid_index = (first + last) / 2;
3734  mid_xid = KnownAssignedXids[mid_index];
3735 
3736  if (xid == mid_xid)
3737  {
3738  result_index = mid_index;
3739  break;
3740  }
3741  else if (TransactionIdPrecedes(xid, mid_xid))
3742  last = mid_index - 1;
3743  else
3744  first = mid_index + 1;
3745  }
3746 
3747  if (result_index < 0)
3748  return false; /* not in array */
3749 
3750  if (!KnownAssignedXidsValid[result_index])
3751  return false; /* in array, but invalid */
3752 
3753  if (remove)
3754  {
3755  KnownAssignedXidsValid[result_index] = false;
3756 
3757  pArray->numKnownAssignedXids--;
3758  Assert(pArray->numKnownAssignedXids >= 0);
3759 
3760  /*
3761  * If we're removing the tail element then advance tail pointer over
3762  * any invalid elements. This will speed future searches.
3763  */
3764  if (result_index == tail)
3765  {
3766  tail++;
3767  while (tail < head && !KnownAssignedXidsValid[tail])
3768  tail++;
3769  if (tail >= head)
3770  {
3771  /* Array is empty, so we can reset both pointers */
3772  pArray->headKnownAssignedXids = 0;
3773  pArray->tailKnownAssignedXids = 0;
3774  }
3775  else
3776  {
3777  pArray->tailKnownAssignedXids = tail;
3778  }
3779  }
3780  }
3781 
3782  return true;
3783 }
3784 
3785 /*
3786  * Is the specified XID present in KnownAssignedXids[]?
3787  *
3788  * Caller must hold ProcArrayLock in shared or exclusive mode.
3789  */
3790 static bool
3792 {
3794 
3795  return KnownAssignedXidsSearch(xid, false);
3796 }
3797 
3798 /*
3799  * Remove the specified XID from KnownAssignedXids[].
3800  *
3801  * Caller must hold ProcArrayLock in exclusive mode.
3802  */
3803 static void
3805 {
3807 
3808  elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
3809 
3810  /*
3811  * Note: we cannot consider it an error to remove an XID that's not
3812  * present. We intentionally remove subxact IDs while processing
3813  * XLOG_XACT_ASSIGNMENT, to avoid array overflow. Then those XIDs will be
3814  * removed again when the top-level xact commits or aborts.
3815  *
3816  * It might be possible to track such XIDs to distinguish this case from
3817  * actual errors, but it would be complicated and probably not worth it.
3818  * So, just ignore the search result.
3819  */
3820  (void) KnownAssignedXidsSearch(xid, true);
3821 }
3822 
3823 /*
3824  * KnownAssignedXidsRemoveTree
3825  * Remove xid (if it's not InvalidTransactionId) and all the subxids.
3826  *
3827  * Caller must hold ProcArrayLock in exclusive mode.
3828  */
3829 static void
3831  TransactionId *subxids)
3832 {
3833  int i;
3834 
3835  if (TransactionIdIsValid(xid))
3837 
3838  for (i = 0; i < nsubxids; i++)
3839  KnownAssignedXidsRemove(subxids[i]);
3840 
3841  /* Opportunistically compress the array */
3843 }
3844 
3845 /*
3846  * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid
3847  * then clear the whole table.
3848  *
3849  * Caller must hold ProcArrayLock in exclusive mode.
3850  */
3851 static void
3853 {
3854  ProcArrayStruct *pArray = procArray;
3855  int count = 0;
3856  int head,
3857  tail,
3858  i;
3859 
3860  if (!TransactionIdIsValid(removeXid))
3861  {
3862  elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
3863  pArray->numKnownAssignedXids = 0;
3864  pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0;
3865  return;
3866  }
3867 
3868  elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid);
3869 
3870  /*
3871  * Mark entries invalid starting at the tail. Since array is sorted, we
3872  * can stop as soon as we reach an entry >= removeXid.
3873  */
3874  tail = pArray->tailKnownAssignedXids;
3875  head = pArray->headKnownAssignedXids;
3876 
3877  for (i = tail; i < head; i++)
3878  {
3879  if (KnownAssignedXidsValid[i])
3880  {
3881  TransactionId knownXid = KnownAssignedXids[i];
3882 
3883  if (TransactionIdFollowsOrEquals(knownXid, removeXid))
3884  break;
3885 
3886  if (!StandbyTransactionIdIsPrepared(knownXid))
3887  {
3888  KnownAssignedXidsValid[i] = false;
3889  count++;
3890  }
3891  }
3892  }
3893 
3894  pArray->numKnownAssignedXids -= count;
3895  Assert(pArray->numKnownAssignedXids >= 0);
3896 
3897  /*
3898  * Advance the tail pointer if we've marked the tail item invalid.
3899  */
3900  for (i = tail; i < head; i++)
3901  {
3902  if (KnownAssignedXidsValid[i])
3903  break;
3904  }
3905  if (i >= head)
3906  {
3907  /* Array is empty, so we can reset both pointers */
3908  pArray->headKnownAssignedXids = 0;
3909  pArray->tailKnownAssignedXids = 0;
3910  }
3911  else
3912  {
3913  pArray->tailKnownAssignedXids = i;
3914  }
3915 
3916  /* Opportunistically compress the array */
3918 }
3919 
3920 /*
3921  * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
3922  * We filter out anything >= xmax.
3923  *
3924  * Returns the number of XIDs stored into xarray[]. Caller is responsible
3925  * that array is large enough.
3926  *
3927  * Caller must hold ProcArrayLock in (at least) shared mode.
3928  */
3929 static int
3931 {
3933 
3934  return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax);
3935 }
3936 
3937 /*
3938  * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus
3939  * we reduce *xmin to the lowest xid value seen if not already lower.
3940  *
3941  * Caller must hold ProcArrayLock in (at least) shared mode.
3942  */
3943 static int
3945  TransactionId xmax)
3946 {
3947  int count = 0;
3948  int head,
3949  tail;
3950  int i;
3951 
3952  /*
3953  * Fetch head just once, since it may change while we loop. We can stop
3954  * once we reach the initially seen head, since we are certain that an xid
3955  * cannot enter and then leave the array while we hold ProcArrayLock. We
3956  * might miss newly-added xids, but they should be >= xmax so irrelevant
3957  * anyway.
3958  *
3959  * Must take spinlock to ensure we see up-to-date array contents.
3960  */
3962  tail = procArray->tailKnownAssignedXids;
3963  head = procArray->headKnownAssignedXids;
3965 
3966  for (i = tail; i < head; i++)
3967  {
3968  /* Skip any gaps in the array */
3969  if (KnownAssignedXidsValid[i])
3970  {
3971  TransactionId knownXid = KnownAssignedXids[i];
3972 
3973  /*
3974  * Update xmin if required. Only the first XID need be checked,
3975  * since the array is sorted.
3976  */
3977  if (count == 0 &&
3978  TransactionIdPrecedes(knownXid, *xmin))
3979  *xmin = knownXid;
3980 
3981  /*
3982  * Filter out anything >= xmax, again relying on sorted property
3983  * of array.
3984  */
3985  if (TransactionIdIsValid(xmax) &&
3986  TransactionIdFollowsOrEquals(knownXid, xmax))
3987  break;
3988 
3989  /* Add knownXid into output array */
3990  xarray[count++] = knownXid;
3991  }
3992  }
3993 
3994  return count;
3995 }
3996 
3997 /*
3998  * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId
3999  * if nothing there.
4000  */
4001 static TransactionId
4003 {
4004  int head,
4005  tail;
4006  int i;
4007 
4008  /*
4009  * Fetch head just once, since it may change while we loop.
4010  */
4012  tail = procArray->tailKnownAssignedXids;
4013  head = procArray->headKnownAssignedXids;
4015 
4016  for (i = tail; i < head; i++)
4017  {
4018  /* Skip any gaps in the array */
4019  if (KnownAssignedXidsValid[i])
4020  return KnownAssignedXids[i];
4021  }
4022 
4023  return InvalidTransactionId;
4024 }
4025 
4026 /*
4027  * Display KnownAssignedXids to provide debug trail
4028  *
4029  * Currently this is only called within startup process, so we need no
4030  * special locking.
4031  *
4032  * Note this is pretty expensive, and much of the expense will be incurred
4033  * even if the elog message will get discarded. It's not currently called
4034  * in any performance-critical places, however, so no need to be tenser.
4035  */
4036 static void
4038 {
4039  ProcArrayStruct *pArray = procArray;
4041  int head,
4042  tail,
4043  i;
4044  int nxids = 0;
4045 
4046  tail = pArray->tailKnownAssignedXids;
4047  head = pArray->headKnownAssignedXids;
4048 
4049  initStringInfo(&buf);
4050 
4051  for (i = tail; i < head; i++)
4052  {
4053  if (KnownAssignedXidsValid[i])
4054  {
4055  nxids++;
4056  appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
4057  }
4058  }
4059 
4060  elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",
4061  nxids,
4062  pArray->numKnownAssignedXids,
4063  pArray->tailKnownAssignedXids,
4064  pArray->headKnownAssignedXids,
4065  buf.data);
4066 
4067  pfree(buf.data);
4068 }
4069 
4070 /*
4071  * KnownAssignedXidsReset
4072  * Resets KnownAssignedXids to be empty
4073  */
4074 static void
4076 {
4077  ProcArrayStruct *pArray = procArray;
4078 
4079  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4080 
4081  pArray->numKnownAssignedXids = 0;
4082  pArray->tailKnownAssignedXids = 0;
4083  pArray->headKnownAssignedXids = 0;
4084 
4085  LWLockRelease(ProcArrayLock);
4086 }
#define TransactionIdAdvance(dest)
Definition: transam.h:89
int slock_t
Definition: s_lock.h:934
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:663
#define NIL
Definition: pg_list.h:65
#define XidCacheRemove(i)
Definition: procarray.c:3129
static TransactionId latestObservedXid
Definition: procarray.c:112
VirtualTransactionId * GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, bool allDbs, int excludeVacuum, int *nvxids)
Definition: procarray.c:2490
TransactionId oldestRunningXid
Definition: standby.h:76
bool procArrayGroupMember
Definition: proc.h:175
pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
Definition: procarray.c:2653
#define PROCARRAY_MAXPROCS
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition: procarray.c:2176
int CountDBBackends(Oid databaseid)
Definition: procarray.c:2755
#define DEBUG1
Definition: elog.h:25
static void KnownAssignedXidsDisplay(int trace_level)
Definition: procarray.c:4037
#define GET_VXID_FROM_PGPROC(vxid, proc)
Definition: lock.h:79
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
#define PROC_IN_LOGICAL_DECODING
Definition: proc.h:57
BackendId backendId
Definition: proc.h:120
uint32 TransactionId
Definition: c.h:513
bool copied
Definition: snapshot.h:185
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition: varsup.c:262
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: posix_sema.c:340
Definition: proc.h:229
#define DEBUG3
Definition: elog.h:23
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:854
Oid GetUserId(void)
Definition: miscinit.c:448
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:150
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1928
TransactionId xmin
Definition: proc.h:235
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
PGPROC * BackendPidGetProc(int pid)
Definition: procarray.c:2362
#define FullTransactionIdIsValid(x)
Definition: transam.h:55
PGXACT * allPgXact
Definition: proc.h:254
PGPROC * MyProc
Definition: proc.c:67
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:986
TransactionId xid
Definition: proc.h:231
TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids)
Definition: transam.c:365
int vacuum_defer_cleanup_age
Definition: standby.c:39
#define UINT32_ACCESS_ONCE(var)
Definition: procarray.c:67
#define SpinLockInit(lock)
Definition: spin.h:60
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:4892
VirtualTransactionId * GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
Definition: procarray.c:2578
TransactionId replication_slot_catalog_xmin
Definition: procarray.c:96
XLogRecPtr lsn
Definition: snapshot.h:203
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:349
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids)
Definition: procarray.c:2272
static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:485
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:276
#define xc_by_my_xact_inc()
Definition: procarray.c:149
Oid roleId
Definition: proc.h:122
int errcode(int sqlerrcode)
Definition: elog.c:610
TransactionId RecentXmin
Definition: snapmgr.c:167
slock_t known_assigned_xids_lck
Definition: procarray.c:82
bool superuser(void)
Definition: superuser.c:46
PROC_HDR * ProcGlobal
Definition: proc.c:80
bool suboverflowed
Definition: snapshot.h:182
TransactionId * xids
Definition: standby.h:79
#define kill(pid, sig)
Definition: win32_port.h:426
FullTransactionId nextFullXid
Definition: transam.h:178
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:125
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:109
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:8071
#define TransactionIdRetreat(dest)
Definition: transam.h:106
LocalTransactionId localTransactionId
Definition: lock.h:65
void ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:397
#define xc_by_child_xid_inc()
Definition: procarray.c:152
bool TransactionIdIsKnownCompleted(TransactionId transactionId)
Definition: transam.c:238
#define DEBUG4
Definition: elog.h:22
#define fprintf
Definition: port.h:197
void ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, TransactionId *subxids, TransactionId max_xid)
Definition: procarray.c:3378
#define MAXAUTOVACPIDS
uint32 regd_count
Definition: snapshot.h:199
#define OidIsValid(objectId)
Definition: c.h:644
void ProcArrayGetReplicationSlotXmin(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:3114
void ExpireAllKnownAssignedTransactionIds(void)
Definition: procarray.c:3403
PGXACT * MyPgXact
Definition: proc.c:68
int trace_recovery(int trace_level)
Definition: elog.c:3505
uint8 vacuumFlags
Definition: proc.h:240
#define PROC_VACUUM_STATE_MASK
Definition: proc.h:62
#define XidFromFullTransactionId(x)
Definition: transam.h:48
TransactionId TransactionXmin
Definition: snapmgr.c:166
TransactionId latestCompletedXid
Definition: standby.h:77
Definition: type.h:89
#define malloc(a)
Definition: header.h:50
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1812
static uint32 pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval)
Definition: atomics.h:292
bool isBackgroundWorker
Definition: proc.h:127
#define VirtualTransactionIdEquals(vxid1, vxid2)
Definition: lock.h:73
#define xc_by_recent_xmin_inc()
Definition: procarray.c:147
#define xc_by_known_xact_inc()
Definition: procarray.c:148
bool MinimumActiveBackends(int min)
Definition: procarray.c:2701
static void KnownAssignedXidsRemovePreceding(TransactionId xid)
Definition: procarray.c:3852
PGPROC * BackendPidGetProcWithLock(int pid)
Definition: procarray.c:2385
#define SpinLockAcquire(lock)
Definition: spin.h:62
void pg_usleep(long microsec)
Definition: signal.c:53
int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
Definition: procsignal.c:250
void pfree(void *pointer)
Definition: mcxt.c:1056
#define PROC_IN_VACUUM
Definition: proc.h:54
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
int CountDBConnections(Oid databaseid)
Definition: procarray.c:2785
#define FirstNormalTransactionId
Definition: transam.h:34
#define ERROR
Definition: elog.h:43
void XidCacheRemoveRunningXids(TransactionId xid, int nxids, const TransactionId *xids, TransactionId latestXid)
Definition: procarray.c:3145
TimestampTz GetSnapshotCurrentTimestamp(void)
Definition: snapmgr.c:1720
bool delayChkpt
Definition: proc.h:152
void ProcArrayClearTransaction(PGPROC *proc)
Definition: procarray.c:600
#define lfirst_int(lc)
Definition: pg_list.h:191
#define PROCARRAY_SLOTS_XMIN
Definition: procarray.h:37
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:392
#define PROCARRAY_PROC_FLAGS_MASK
Definition: procarray.h:44
void ExtendSUBTRANS(TransactionId newestXact)
Definition: subtrans.c:326
void ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:334
#define FATAL
Definition: elog.h:52
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:11486
void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:2816
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2155
#define xc_by_main_xid_inc()
Definition: procarray.c:151
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1380
static char * buf
Definition: pg_test_fsync.c:67
bool recoveryConflictPending
Definition: proc.h:134
#define xc_by_known_assigned_inc()
Definition: procarray.c:153
TransactionId RecentGlobalXmin
Definition: snapmgr.c:168
uint8 nxids
Definition: proc.h:243
VariableCache ShmemVariableCache
Definition: varsup.c:34
int maxKnownAssignedXids
Definition: procarray.c:78
#define InvalidTransactionId
Definition: transam.h:31
bool StandbyTransactionIdIsPrepared(TransactionId xid)
Definition: twophase.c:1375
static PGPROC * allProcs
Definition: procarray.c:104
Oid databaseId
Definition: proc.h:121
unsigned int uint32
Definition: c.h:367
TransactionId xmax
Definition: snapshot.h:158
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1381
TransactionId xmin
Definition: snapshot.h:157
static void KnownAssignedXidsReset(void)
Definition: procarray.c:4075
LOCK * waitLock
Definition: proc.h:146
int numKnownAssignedXids
Definition: procarray.c:79
TransactionId RecentGlobalDataXmin
Definition: snapmgr.c:169
static bool * KnownAssignedXidsValid
Definition: procarray.c:111
struct XidCache subxids
Definition: proc.h:171
TransactionId lastOverflowedXid
Definition: procarray.c:91
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:181
#define xc_by_latest_xid_inc()
Definition: procarray.c:150
bool superuser_arg(Oid roleid)
Definition: superuser.c:56
#define INVALID_PGPROCNO
Definition: proc.h:77
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId * xip
Definition: snapshot.h:168
static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:3830
pg_atomic_uint32 procArrayGroupNext
Definition: proc.h:177
List * lappend_int(List *list, int datum)
Definition: list.c:339
Definition: proc.h:249
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
static ProcArrayStruct * procArray
Definition: procarray.c:102
#define WARNING
Definition: elog.h:40
#define VirtualTransactionIdIsValid(vxid)
Definition: lock.h:70
#define SpinLockRelease(lock)
Definition: spin.h:64
TransactionId replication_slot_xmin
Definition: procarray.c:94
Size mul_size(Size s1, Size s2)
Definition: shmem.c:515
int BackendXidGetPid(TransactionId xid)
Definition: procarray.c:2422
#define InvalidBackendId
Definition: backendid.h:23
static PGXACT * allPgXact
Definition: procarray.c:105
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:619
static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
Definition: procarray.c:3930
Size add_size(Size s1, Size s2)
Definition: shmem.c:498
Oid MyDatabaseId
Definition: globals.c:85
static TransactionId KnownAssignedXidsGetOldestXmin(void)
Definition: procarray.c:4002
bool overflowed
Definition: proc.h:241
static void ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact, TransactionId latestXid)
Definition: procarray.c:451
#define InvalidOid
Definition: postgres_ext.h:36
CommandId curcid
Definition: snapshot.h:187
#define ereport(elevel,...)
Definition: elog.h:144
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:1451
TransactionId GetOldestXmin(Relation rel, int flags)
Definition: procarray.c:1305
int pgprocnos[FLEXIBLE_ARRAY_MEMBER]
Definition: procarray.c:99
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]
Definition: proc.h:40
#define TOTAL_MAX_CACHED_SUBXIDS
#define Assert(condition)
Definition: c.h:738
static TransactionId * KnownAssignedXids
Definition: procarray.c:110
BackendId backendId
Definition: lock.h:64
#define pg_read_barrier()
Definition: atomics.h:158
void CreateSharedProcArray(void)
Definition: procarray.c:225
bool takenDuringRecovery
Definition: snapshot.h:184
size_t Size
Definition: c.h:466
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1357
Snapshot GetSnapshotData(Snapshot snapshot)
Definition: procarray.c:1504
int errdetail_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1049
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1208
static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, TransactionId xmax)
Definition: procarray.c:3944
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, bool exclusive_lock)
Definition: procarray.c:3580
bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
Definition: procarray.c:1870
#define NormalTransactionIdPrecedes(id1, id2)
Definition: transam.h:112
#define xc_no_overflow_inc()
Definition: procarray.c:154
bool EnableHotStandby
Definition: xlog.c:96
void PGSemaphoreLock(PGSemaphore sema)
Definition: posix_sema.c:320
static void KnownAssignedXidsCompress(bool force)
Definition: procarray.c:3518
int CountUserBackends(Oid roleid)
Definition: procarray.c:2856
static bool KnownAssignedXidExists(TransactionId xid)
Definition: procarray.c:3791
int pgprocno
Definition: proc.h:117
TransactionId nextXid
Definition: standby.h:75
bool TransactionIdIsActive(TransactionId xid)
Definition: procarray.c:1206
#define xc_slow_answer_inc()
Definition: procarray.c:155
pg_atomic_uint32 procArrayGroupFirst
Definition: proc.h:266
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:632
uint32 xcnt
Definition: snapshot.h:169
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:824
struct ProcArrayStruct ProcArrayStruct
static bool KnownAssignedXidsSearch(TransactionId xid, bool remove)
Definition: procarray.c:3698
static void KnownAssignedXidsRemove(TransactionId xid)
Definition: procarray.c:3804
#define elog(elevel,...)
Definition: elog.h:214
int old_snapshot_threshold
Definition: snapmgr.c:75
#define InvalidLocalTransactionId
Definition: lock.h:68
int i
void ExpireOldKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:3415
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2110
bool IsBackendPid(int pid)
Definition: procarray.c:2457
#define pg_write_barrier()
Definition: atomics.h:159
ProcSignalReason
Definition: procsignal.h:30
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
Definition: procarray.c:3092
int GetMaxSnapshotSubxidCount(void)
Definition: procarray.c:1462
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:1936
void ProcArrayApplyXidAssignment(TransactionId topxid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:902
TimestampTz whenTaken
Definition: snapshot.h:202
void TerminateOtherDBBackends(Oid databaseId)
Definition: procarray.c:2984
PGPROC * allProcs
Definition: proc.h:252
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
bool CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
Definition: procarray.c:2906
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:746
#define qsort(a, b, c, d)
Definition: port.h:479
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
Definition: snapmgr.c:1882
PGSemaphore sem
Definition: proc.h:108
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
Definition: procarray.c:2316
void RecordKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:3307
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void SubTransSetParent(TransactionId xid, TransactionId parent)
Definition: subtrans.c:74
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
Definition: procarray.c:1795
int tailKnownAssignedXids
Definition: procarray.c:80
static TransactionId standbySnapshotPendingXmin
Definition: procarray.c:119
Definition: proc.h:102
Definition: pg_list.h:50
int pid
Definition: proc.h:116
HotStandbyState standbyState
Definition: xlog.c:207
void ProcArrayAdd(PGPROC *proc)
Definition: procarray.c:276
#define PROC_IS_AUTOVACUUM
Definition: proc.h:53
#define offsetof(type, field)
Definition: c.h:661
TransactionId procArrayGroupMemberXid
Definition: proc.h:183
Size ProcArrayShmemSize(void)
Definition: procarray.c:183
void StandbyReleaseOldLocks(TransactionId oldxid)
Definition: standby.c:773
TransactionId * subxip
Definition: snapshot.h:180
uint32 active_count
Definition: snapshot.h:198
int headKnownAssignedXids
Definition: procarray.c:81
int xidComparator(const void *arg1, const void *arg2)
Definition: xid.c:139
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
int32 subxcnt
Definition: snapshot.h:181
LocalTransactionId lxid
Definition: proc.h:113
TransactionId latestCompletedXid
Definition: transam.h:196