PostgreSQL Source Code  git master
procarray.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * procarray.c
4  * POSTGRES process array code.
5  *
6  *
7  * This module maintains arrays of PGPROC substructures, as well as associated
8  * arrays in ProcGlobal, for all active backends. Although there are several
9  * uses for this, the principal one is as a means of determining the set of
10  * currently running transactions.
11  *
12  * Because of various subtle race conditions it is critical that a backend
13  * hold the correct locks while setting or clearing its xid (in
14  * ProcGlobal->xids[]/MyProc->xid). See notes in
15  * src/backend/access/transam/README.
16  *
17  * The process arrays now also include structures representing prepared
18  * transactions. The xid and subxids fields of these are valid, as are the
19  * myProcLocks lists. They can be distinguished from regular backend PGPROCs
20  * at need by checking for pid == 0.
21  *
22  * During hot standby, we also keep a list of XIDs representing transactions
23  * that are known to be running on the primary (or more precisely, were running
24  * as of the current point in the WAL stream). This list is kept in the
25  * KnownAssignedXids array, and is updated by watching the sequence of
26  * arriving XIDs. This is necessary because if we leave those XIDs out of
27  * snapshots taken for standby queries, then they will appear to be already
28  * complete, leading to MVCC failures. Note that in hot standby, the PGPROC
29  * array represents standby processes, which by definition are not running
30  * transactions that have XIDs.
31  *
32  * It is perhaps possible for a backend on the primary to terminate without
33  * writing an abort record for its transaction. While that shouldn't really
34  * happen, it would tie up KnownAssignedXids indefinitely, so we protect
35  * ourselves by pruning the array when a valid list of running XIDs arrives.
36  *
37  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
38  * Portions Copyright (c) 1994, Regents of the University of California
39  *
40  *
41  * IDENTIFICATION
42  * src/backend/storage/ipc/procarray.c
43  *
44  *-------------------------------------------------------------------------
45  */
46 #include "postgres.h"
47 
48 #include <signal.h>
49 
50 #include "access/clog.h"
51 #include "access/subtrans.h"
52 #include "access/transam.h"
53 #include "access/twophase.h"
54 #include "access/xact.h"
55 #include "access/xlogutils.h"
56 #include "catalog/catalog.h"
57 #include "catalog/pg_authid.h"
58 #include "commands/dbcommands.h"
59 #include "miscadmin.h"
60 #include "pgstat.h"
61 #include "storage/proc.h"
62 #include "storage/procarray.h"
63 #include "storage/spin.h"
64 #include "utils/acl.h"
65 #include "utils/builtins.h"
66 #include "utils/rel.h"
67 #include "utils/snapmgr.h"
68 
69 #define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
70 
71 /* Our shared memory area */
72 typedef struct ProcArrayStruct
73 {
74  int numProcs; /* number of valid procs entries */
75  int maxProcs; /* allocated size of procs array */
76 
77  /*
78  * Known assigned XIDs handling
79  */
80  int maxKnownAssignedXids; /* allocated size of array */
81  int numKnownAssignedXids; /* current # of valid entries */
82  int tailKnownAssignedXids; /* index of oldest valid element */
83  int headKnownAssignedXids; /* index of newest element, + 1 */
84  slock_t known_assigned_xids_lck; /* protects head/tail pointers */
85 
86  /*
87  * Highest subxid that has been removed from KnownAssignedXids array to
88  * prevent overflow; or InvalidTransactionId if none. We track this for
89  * similar reasons to tracking overflowing cached subxids in PGPROC
90  * entries. Must hold exclusive ProcArrayLock to change this, and shared
91  * lock to read it.
92  */
94 
95  /* oldest xmin of any replication slot */
97  /* oldest catalog xmin of any replication slot */
99 
100  /* indexes into allProcs[], has PROCARRAY_MAXPROCS entries */
103 
104 /*
105  * State for the GlobalVisTest* family of functions. Those functions can
106  * e.g. be used to decide if a deleted row can be removed without violating
107  * MVCC semantics: If the deleted row's xmax is not considered to be running
108  * by anyone, the row can be removed.
109  *
110  * To avoid slowing down GetSnapshotData(), we don't calculate a precise
111  * cutoff XID while building a snapshot (looking at the frequently changing
112  * xmins scales badly). Instead we compute two boundaries while building the
113  * snapshot:
114  *
115  * 1) definitely_needed, indicating that rows deleted by XIDs >=
116  * definitely_needed are definitely still visible.
117  *
118  * 2) maybe_needed, indicating that rows deleted by XIDs < maybe_needed can
119  * definitely be removed
120  *
121  * When testing an XID that falls in between the two (i.e. XID >= maybe_needed
122  * && XID < definitely_needed), the boundaries can be recomputed (using
123  * ComputeXidHorizons()) to get a more accurate answer. This is cheaper than
124  * maintaining an accurate value all the time.
125  *
126  * As it is not cheap to compute accurate boundaries, we limit the number of
127  * times that happens in short succession. See GlobalVisTestShouldUpdate().
128  *
129  *
130  * There are three backend lifetime instances of this struct, optimized for
131  * different types of relations. As e.g. a normal user defined table in one
132  * database is inaccessible to backends connected to another database, a test
133  * specific to a relation can be more aggressive than a test for a shared
134  * relation. Currently we track four different states:
135  *
136  * 1) GlobalVisSharedRels, which only considers an XID's
137  * effects visible-to-everyone if neither snapshots in any database, nor a
138  * replication slot's xmin, nor a replication slot's catalog_xmin might
139  * still consider XID as running.
140  *
141  * 2) GlobalVisCatalogRels, which only considers an XID's
142  * effects visible-to-everyone if neither snapshots in the current
143  * database, nor a replication slot's xmin, nor a replication slot's
144  * catalog_xmin might still consider XID as running.
145  *
146  * I.e. the difference to GlobalVisSharedRels is that
147  * snapshot in other databases are ignored.
148  *
149  * 3) GlobalVisDataRels, which only considers an XID's
150  * effects visible-to-everyone if neither snapshots in the current
151  * database, nor a replication slot's xmin consider XID as running.
152  *
153  * I.e. the difference to GlobalVisCatalogRels is that
154  * replication slot's catalog_xmin is not taken into account.
155  *
156  * 4) GlobalVisTempRels, which only considers the current session, as temp
157  * tables are not visible to other sessions.
158  *
159  * GlobalVisTestFor(relation) returns the appropriate state
160  * for the relation.
161  *
162  * The boundaries are FullTransactionIds instead of TransactionIds to avoid
163  * wraparound dangers. There e.g. would otherwise exist no procarray state to
164  * prevent maybe_needed to become old enough after the GetSnapshotData()
165  * call.
166  *
167  * The typedef is in the header.
168  */
170 {
171  /* XIDs >= are considered running by some backend */
173 
174  /* XIDs < are not considered to be running by any backend */
176 };
177 
178 /*
179  * Result of ComputeXidHorizons().
180  */
182 {
183  /*
184  * The value of ShmemVariableCache->latestCompletedXid when
185  * ComputeXidHorizons() held ProcArrayLock.
186  */
188 
189  /*
190  * The same for procArray->replication_slot_xmin and.
191  * procArray->replication_slot_catalog_xmin.
192  */
195 
196  /*
197  * Oldest xid that any backend might still consider running. This needs to
198  * include processes running VACUUM, in contrast to the normal visibility
199  * cutoffs, as vacuum needs to be able to perform pg_subtrans lookups when
200  * determining visibility, but doesn't care about rows above its xmin to
201  * be removed.
202  *
203  * This likely should only be needed to determine whether pg_subtrans can
204  * be truncated. It currently includes the effects of replication slots,
205  * for historical reasons. But that could likely be changed.
206  */
208 
209  /*
210  * Oldest xid for which deleted tuples need to be retained in shared
211  * tables.
212  *
213  * This includes the effects of replication slots. If that's not desired,
214  * look at shared_oldest_nonremovable_raw;
215  */
217 
218  /*
219  * Oldest xid that may be necessary to retain in shared tables. This is
220  * the same as shared_oldest_nonremovable, except that is not affected by
221  * replication slot's catalog_xmin.
222  *
223  * This is mainly useful to be able to send the catalog_xmin to upstream
224  * streaming replication servers via hot_standby_feedback, so they can
225  * apply the limit only when accessing catalog tables.
226  */
228 
229  /*
230  * Oldest xid for which deleted tuples need to be retained in non-shared
231  * catalog tables.
232  */
234 
235  /*
236  * Oldest xid for which deleted tuples need to be retained in normal user
237  * defined tables.
238  */
240 
241  /*
242  * Oldest xid for which deleted tuples need to be retained in this
243  * session's temporary tables.
244  */
246 
248 
249 /*
250  * Return value for GlobalVisHorizonKindForRel().
251  */
253 {
259 
260 
262 
263 static PGPROC *allProcs;
264 
265 /*
266  * Bookkeeping for tracking emulated transactions in recovery
267  */
271 
272 /*
273  * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
274  * the highest xid that might still be running that we don't have in
275  * KnownAssignedXids.
276  */
278 
279 /*
280  * State for visibility checks on different types of relations. See struct
281  * GlobalVisState for details. As shared, catalog, normal and temporary
282  * relations can have different horizons, one such state exists for each.
283  */
288 
289 /*
290  * This backend's RecentXmin at the last time the accurate xmin horizon was
291  * recomputed, or InvalidTransactionId if it has not. Used to limit how many
292  * times accurate horizons are recomputed. See GlobalVisTestShouldUpdate().
293  */
295 
296 #ifdef XIDCACHE_DEBUG
297 
298 /* counters for XidCache measurement */
299 static long xc_by_recent_xmin = 0;
300 static long xc_by_known_xact = 0;
301 static long xc_by_my_xact = 0;
302 static long xc_by_latest_xid = 0;
303 static long xc_by_main_xid = 0;
304 static long xc_by_child_xid = 0;
305 static long xc_by_known_assigned = 0;
306 static long xc_no_overflow = 0;
307 static long xc_slow_answer = 0;
308 
309 #define xc_by_recent_xmin_inc() (xc_by_recent_xmin++)
310 #define xc_by_known_xact_inc() (xc_by_known_xact++)
311 #define xc_by_my_xact_inc() (xc_by_my_xact++)
312 #define xc_by_latest_xid_inc() (xc_by_latest_xid++)
313 #define xc_by_main_xid_inc() (xc_by_main_xid++)
314 #define xc_by_child_xid_inc() (xc_by_child_xid++)
315 #define xc_by_known_assigned_inc() (xc_by_known_assigned++)
316 #define xc_no_overflow_inc() (xc_no_overflow++)
317 #define xc_slow_answer_inc() (xc_slow_answer++)
318 
319 static void DisplayXidCache(void);
320 #else /* !XIDCACHE_DEBUG */
321 
322 #define xc_by_recent_xmin_inc() ((void) 0)
323 #define xc_by_known_xact_inc() ((void) 0)
324 #define xc_by_my_xact_inc() ((void) 0)
325 #define xc_by_latest_xid_inc() ((void) 0)
326 #define xc_by_main_xid_inc() ((void) 0)
327 #define xc_by_child_xid_inc() ((void) 0)
328 #define xc_by_known_assigned_inc() ((void) 0)
329 #define xc_no_overflow_inc() ((void) 0)
330 #define xc_slow_answer_inc() ((void) 0)
331 #endif /* XIDCACHE_DEBUG */
332 
333 /* Primitives for KnownAssignedXids array handling for standby */
334 static void KnownAssignedXidsCompress(bool force);
335 static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
336  bool exclusive_lock);
337 static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
338 static bool KnownAssignedXidExists(TransactionId xid);
339 static void KnownAssignedXidsRemove(TransactionId xid);
340 static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
341  TransactionId *subxids);
343 static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
345  TransactionId *xmin,
346  TransactionId xmax);
348 static void KnownAssignedXidsDisplay(int trace_level);
349 static void KnownAssignedXidsReset(void);
350 static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid);
351 static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
352 static void MaintainLatestCompletedXid(TransactionId latestXid);
354 
356  TransactionId xid);
357 static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons);
358 
359 /*
360  * Report shared-memory space needed by CreateSharedProcArray.
361  */
362 Size
364 {
365  Size size;
366 
367  /* Size of the ProcArray structure itself */
368 #define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts)
369 
370  size = offsetof(ProcArrayStruct, pgprocnos);
371  size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
372 
373  /*
374  * During Hot Standby processing we have a data structure called
375  * KnownAssignedXids, created in shared memory. Local data structures are
376  * also created in various backends during GetSnapshotData(),
377  * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
378  * main structures created in those functions must be identically sized,
379  * since we may at times copy the whole of the data structures around. We
380  * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
381  *
382  * Ideally we'd only create this structure if we were actually doing hot
383  * standby in the current run, but we don't know that yet at the time
384  * shared memory is being set up.
385  */
386 #define TOTAL_MAX_CACHED_SUBXIDS \
387  ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
388 
389  if (EnableHotStandby)
390  {
391  size = add_size(size,
392  mul_size(sizeof(TransactionId),
394  size = add_size(size,
395  mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
396  }
397 
398  return size;
399 }
400 
401 /*
402  * Initialize the shared PGPROC array during postmaster startup.
403  */
404 void
406 {
407  bool found;
408 
409  /* Create or attach to the ProcArray shared structure */
411  ShmemInitStruct("Proc Array",
412  add_size(offsetof(ProcArrayStruct, pgprocnos),
413  mul_size(sizeof(int),
415  &found);
416 
417  if (!found)
418  {
419  /*
420  * We're the first - initialize.
421  */
422  procArray->numProcs = 0;
433  }
434 
436 
437  /* Create or attach to the KnownAssignedXids arrays too, if needed */
438  if (EnableHotStandby)
439  {
441  ShmemInitStruct("KnownAssignedXids",
442  mul_size(sizeof(TransactionId),
444  &found);
445  KnownAssignedXidsValid = (bool *)
446  ShmemInitStruct("KnownAssignedXidsValid",
447  mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
448  &found);
449  }
450 }
451 
452 /*
453  * Add the specified PGPROC to the shared array.
454  */
455 void
457 {
458  ProcArrayStruct *arrayP = procArray;
459  int index;
460  int movecount;
461 
462  /* See ProcGlobal comment explaining why both locks are held */
463  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
464  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
465 
466  if (arrayP->numProcs >= arrayP->maxProcs)
467  {
468  /*
469  * Oops, no room. (This really shouldn't happen, since there is a
470  * fixed supply of PGPROC structs too, and so we should have failed
471  * earlier.)
472  */
473  ereport(FATAL,
474  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
475  errmsg("sorry, too many clients already")));
476  }
477 
478  /*
479  * Keep the procs array sorted by (PGPROC *) so that we can utilize
480  * locality of references much better. This is useful while traversing the
481  * ProcArray because there is an increased likelihood of finding the next
482  * PGPROC structure in the cache.
483  *
484  * Since the occurrence of adding/removing a proc is much lower than the
485  * access to the ProcArray itself, the overhead should be marginal
486  */
487  for (index = 0; index < arrayP->numProcs; index++)
488  {
489  int procno PG_USED_FOR_ASSERTS_ONLY = arrayP->pgprocnos[index];
490 
491  Assert(procno >= 0 && procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS));
492  Assert(allProcs[procno].pgxactoff == index);
493 
494  /* If we have found our right position in the array, break */
495  if (arrayP->pgprocnos[index] > proc->pgprocno)
496  break;
497  }
498 
499  movecount = arrayP->numProcs - index;
500  memmove(&arrayP->pgprocnos[index + 1],
501  &arrayP->pgprocnos[index],
502  movecount * sizeof(*arrayP->pgprocnos));
503  memmove(&ProcGlobal->xids[index + 1],
504  &ProcGlobal->xids[index],
505  movecount * sizeof(*ProcGlobal->xids));
506  memmove(&ProcGlobal->subxidStates[index + 1],
508  movecount * sizeof(*ProcGlobal->subxidStates));
509  memmove(&ProcGlobal->statusFlags[index + 1],
511  movecount * sizeof(*ProcGlobal->statusFlags));
512 
513  arrayP->pgprocnos[index] = proc->pgprocno;
514  proc->pgxactoff = index;
515  ProcGlobal->xids[index] = proc->xid;
518 
519  arrayP->numProcs++;
520 
521  /* adjust pgxactoff for all following PGPROCs */
522  index++;
523  for (; index < arrayP->numProcs; index++)
524  {
525  int procno = arrayP->pgprocnos[index];
526 
527  Assert(procno >= 0 && procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS));
528  Assert(allProcs[procno].pgxactoff == index - 1);
529 
530  allProcs[procno].pgxactoff = index;
531  }
532 
533  /*
534  * Release in reversed acquisition order, to reduce frequency of having to
535  * wait for XidGenLock while holding ProcArrayLock.
536  */
537  LWLockRelease(XidGenLock);
538  LWLockRelease(ProcArrayLock);
539 }
540 
541 /*
542  * Remove the specified PGPROC from the shared array.
543  *
544  * When latestXid is a valid XID, we are removing a live 2PC gxact from the
545  * array, and thus causing it to appear as "not running" anymore. In this
546  * case we must advance latestCompletedXid. (This is essentially the same
547  * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
548  * the ProcArrayLock only once, and don't damage the content of the PGPROC;
549  * twophase.c depends on the latter.)
550  */
551 void
553 {
554  ProcArrayStruct *arrayP = procArray;
555  int myoff;
556  int movecount;
557 
558 #ifdef XIDCACHE_DEBUG
559  /* dump stats at backend shutdown, but not prepared-xact end */
560  if (proc->pid != 0)
561  DisplayXidCache();
562 #endif
563 
564  /* See ProcGlobal comment explaining why both locks are held */
565  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
566  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
567 
568  myoff = proc->pgxactoff;
569 
570  Assert(myoff >= 0 && myoff < arrayP->numProcs);
571  Assert(ProcGlobal->allProcs[arrayP->pgprocnos[myoff]].pgxactoff == myoff);
572 
573  if (TransactionIdIsValid(latestXid))
574  {
576 
577  /* Advance global latestCompletedXid while holding the lock */
578  MaintainLatestCompletedXid(latestXid);
579 
580  /* Same with xactCompletionCount */
582 
584  ProcGlobal->subxidStates[myoff].overflowed = false;
585  ProcGlobal->subxidStates[myoff].count = 0;
586  }
587  else
588  {
589  /* Shouldn't be trying to remove a live transaction here */
591  }
592 
594  Assert(ProcGlobal->subxidStates[myoff].count == 0);
595  Assert(ProcGlobal->subxidStates[myoff].overflowed == false);
596 
597  ProcGlobal->statusFlags[myoff] = 0;
598 
599  /* Keep the PGPROC array sorted. See notes above */
600  movecount = arrayP->numProcs - myoff - 1;
601  memmove(&arrayP->pgprocnos[myoff],
602  &arrayP->pgprocnos[myoff + 1],
603  movecount * sizeof(*arrayP->pgprocnos));
604  memmove(&ProcGlobal->xids[myoff],
605  &ProcGlobal->xids[myoff + 1],
606  movecount * sizeof(*ProcGlobal->xids));
607  memmove(&ProcGlobal->subxidStates[myoff],
608  &ProcGlobal->subxidStates[myoff + 1],
609  movecount * sizeof(*ProcGlobal->subxidStates));
610  memmove(&ProcGlobal->statusFlags[myoff],
611  &ProcGlobal->statusFlags[myoff + 1],
612  movecount * sizeof(*ProcGlobal->statusFlags));
613 
614  arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
615  arrayP->numProcs--;
616 
617  /*
618  * Adjust pgxactoff of following procs for removed PGPROC (note that
619  * numProcs already has been decremented).
620  */
621  for (int index = myoff; index < arrayP->numProcs; index++)
622  {
623  int procno = arrayP->pgprocnos[index];
624 
625  Assert(procno >= 0 && procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS));
626  Assert(allProcs[procno].pgxactoff - 1 == index);
627 
628  allProcs[procno].pgxactoff = index;
629  }
630 
631  /*
632  * Release in reversed acquisition order, to reduce frequency of having to
633  * wait for XidGenLock while holding ProcArrayLock.
634  */
635  LWLockRelease(XidGenLock);
636  LWLockRelease(ProcArrayLock);
637 }
638 
639 
640 /*
641  * ProcArrayEndTransaction -- mark a transaction as no longer running
642  *
643  * This is used interchangeably for commit and abort cases. The transaction
644  * commit/abort must already be reported to WAL and pg_xact.
645  *
646  * proc is currently always MyProc, but we pass it explicitly for flexibility.
647  * latestXid is the latest Xid among the transaction's main XID and
648  * subtransactions, or InvalidTransactionId if it has no XID. (We must ask
649  * the caller to pass latestXid, instead of computing it from the PGPROC's
650  * contents, because the subxid information in the PGPROC might be
651  * incomplete.)
652  */
653 void
655 {
656  if (TransactionIdIsValid(latestXid))
657  {
658  /*
659  * We must lock ProcArrayLock while clearing our advertised XID, so
660  * that we do not exit the set of "running" transactions while someone
661  * else is taking a snapshot. See discussion in
662  * src/backend/access/transam/README.
663  */
665 
666  /*
667  * If we can immediately acquire ProcArrayLock, we clear our own XID
668  * and release the lock. If not, use group XID clearing to improve
669  * efficiency.
670  */
671  if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
672  {
673  ProcArrayEndTransactionInternal(proc, latestXid);
674  LWLockRelease(ProcArrayLock);
675  }
676  else
677  ProcArrayGroupClearXid(proc, latestXid);
678  }
679  else
680  {
681  /*
682  * If we have no XID, we don't need to lock, since we won't affect
683  * anyone else's calculation of a snapshot. We might change their
684  * estimate of global xmin, but that's OK.
685  */
687  Assert(proc->subxidStatus.count == 0);
689 
691  proc->xmin = InvalidTransactionId;
692  proc->delayChkpt = false; /* be sure this is cleared in abort */
693  proc->recoveryConflictPending = false;
694 
695  /* must be cleared with xid/xmin: */
696  /* avoid unnecessarily dirtying shared cachelines */
698  {
699  Assert(!LWLockHeldByMe(ProcArrayLock));
700  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
704  LWLockRelease(ProcArrayLock);
705  }
706  }
707 }
708 
709 /*
710  * Mark a write transaction as no longer running.
711  *
712  * We don't do any locking here; caller must handle that.
713  */
714 static inline void
716 {
717  int pgxactoff = proc->pgxactoff;
718 
719  /*
720  * Note: we need exclusive lock here because we're going to change other
721  * processes' PGPROC entries.
722  */
723  Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE));
725  Assert(ProcGlobal->xids[pgxactoff] == proc->xid);
726 
727  ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
728  proc->xid = InvalidTransactionId;
730  proc->xmin = InvalidTransactionId;
731  proc->delayChkpt = false; /* be sure this is cleared in abort */
732  proc->recoveryConflictPending = false;
733 
734  /* must be cleared with xid/xmin: */
735  /* avoid unnecessarily dirtying shared cachelines */
737  {
740  }
741 
742  /* Clear the subtransaction-XID cache too while holding the lock */
743  Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
745  if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
746  {
747  ProcGlobal->subxidStates[pgxactoff].count = 0;
748  ProcGlobal->subxidStates[pgxactoff].overflowed = false;
749  proc->subxidStatus.count = 0;
750  proc->subxidStatus.overflowed = false;
751  }
752 
753  /* Also advance global latestCompletedXid while holding the lock */
754  MaintainLatestCompletedXid(latestXid);
755 
756  /* Same with xactCompletionCount */
758 }
759 
760 /*
761  * ProcArrayGroupClearXid -- group XID clearing
762  *
763  * When we cannot immediately acquire ProcArrayLock in exclusive mode at
764  * commit time, add ourselves to a list of processes that need their XIDs
765  * cleared. The first process to add itself to the list will acquire
766  * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
767  * on behalf of all group members. This avoids a great deal of contention
768  * around ProcArrayLock when many processes are trying to commit at once,
769  * since the lock need not be repeatedly handed off from one committing
770  * process to the next.
771  */
772 static void
774 {
775  PROC_HDR *procglobal = ProcGlobal;
776  uint32 nextidx;
777  uint32 wakeidx;
778 
779  /* We should definitely have an XID to clear. */
781 
782  /* Add ourselves to the list of processes needing a group XID clear. */
783  proc->procArrayGroupMember = true;
784  proc->procArrayGroupMemberXid = latestXid;
785  nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
786  while (true)
787  {
788  pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx);
789 
791  &nextidx,
792  (uint32) proc->pgprocno))
793  break;
794  }
795 
796  /*
797  * If the list was not empty, the leader will clear our XID. It is
798  * impossible to have followers without a leader because the first process
799  * that has added itself to the list will always have nextidx as
800  * INVALID_PGPROCNO.
801  */
802  if (nextidx != INVALID_PGPROCNO)
803  {
804  int extraWaits = 0;
805 
806  /* Sleep until the leader clears our XID. */
808  for (;;)
809  {
810  /* acts as a read barrier */
811  PGSemaphoreLock(proc->sem);
812  if (!proc->procArrayGroupMember)
813  break;
814  extraWaits++;
815  }
817 
819 
820  /* Fix semaphore count for any absorbed wakeups */
821  while (extraWaits-- > 0)
822  PGSemaphoreUnlock(proc->sem);
823  return;
824  }
825 
826  /* We are the leader. Acquire the lock on behalf of everyone. */
827  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
828 
829  /*
830  * Now that we've got the lock, clear the list of processes waiting for
831  * group XID clearing, saving a pointer to the head of the list. Trying
832  * to pop elements one at a time could lead to an ABA problem.
833  */
834  nextidx = pg_atomic_exchange_u32(&procglobal->procArrayGroupFirst,
836 
837  /* Remember head of list so we can perform wakeups after dropping lock. */
838  wakeidx = nextidx;
839 
840  /* Walk the list and clear all XIDs. */
841  while (nextidx != INVALID_PGPROCNO)
842  {
843  PGPROC *nextproc = &allProcs[nextidx];
844 
846 
847  /* Move to next proc in list. */
848  nextidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext);
849  }
850 
851  /* We're done with the lock now. */
852  LWLockRelease(ProcArrayLock);
853 
854  /*
855  * Now that we've released the lock, go back and wake everybody up. We
856  * don't do this under the lock so as to keep lock hold times to a
857  * minimum. The system calls we need to perform to wake other processes
858  * up are probably much slower than the simple memory writes we did while
859  * holding the lock.
860  */
861  while (wakeidx != INVALID_PGPROCNO)
862  {
863  PGPROC *nextproc = &allProcs[wakeidx];
864 
865  wakeidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext);
867 
868  /* ensure all previous writes are visible before follower continues. */
870 
871  nextproc->procArrayGroupMember = false;
872 
873  if (nextproc != MyProc)
874  PGSemaphoreUnlock(nextproc->sem);
875  }
876 }
877 
878 /*
879  * ProcArrayClearTransaction -- clear the transaction fields
880  *
881  * This is used after successfully preparing a 2-phase transaction. We are
882  * not actually reporting the transaction's XID as no longer running --- it
883  * will still appear as running because the 2PC's gxact is in the ProcArray
884  * too. We just have to clear out our own PGPROC.
885  */
886 void
888 {
889  int pgxactoff;
890 
891  /*
892  * Currently we need to lock ProcArrayLock exclusively here, as we
893  * increment xactCompletionCount below. We also need it at least in shared
894  * mode for pgproc->pgxactoff to stay the same below.
895  *
896  * We could however, as this action does not actually change anyone's view
897  * of the set of running XIDs (our entry is duplicate with the gxact that
898  * has already been inserted into the ProcArray), lower the lock level to
899  * shared if we were to make xactCompletionCount an atomic variable. But
900  * that doesn't seem worth it currently, as a 2PC commit is heavyweight
901  * enough for this not to be the bottleneck. If it ever becomes a
902  * bottleneck it may also be worth considering to combine this with the
903  * subsequent ProcArrayRemove()
904  */
905  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
906 
907  pgxactoff = proc->pgxactoff;
908 
909  ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
910  proc->xid = InvalidTransactionId;
911 
913  proc->xmin = InvalidTransactionId;
914  proc->recoveryConflictPending = false;
915 
917  Assert(!proc->delayChkpt);
918 
919  /*
920  * Need to increment completion count even though transaction hasn't
921  * really committed yet. The reason for that is that GetSnapshotData()
922  * omits the xid of the current transaction, thus without the increment we
923  * otherwise could end up reusing the snapshot later. Which would be bad,
924  * because it might not count the prepared transaction as running.
925  */
927 
928  /* Clear the subtransaction-XID cache too */
929  Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
931  if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
932  {
933  ProcGlobal->subxidStates[pgxactoff].count = 0;
934  ProcGlobal->subxidStates[pgxactoff].overflowed = false;
935  proc->subxidStatus.count = 0;
936  proc->subxidStatus.overflowed = false;
937  }
938 
939  LWLockRelease(ProcArrayLock);
940 }
941 
942 /*
943  * Update ShmemVariableCache->latestCompletedXid to point to latestXid if
944  * currently older.
945  */
946 static void
948 {
950 
951  Assert(FullTransactionIdIsValid(cur_latest));
953  Assert(LWLockHeldByMe(ProcArrayLock));
954 
955  if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
956  {
958  FullXidRelativeTo(cur_latest, latestXid);
959  }
960 
963 }
964 
965 /*
966  * Same as MaintainLatestCompletedXid, except for use during WAL replay.
967  */
968 static void
970 {
972  FullTransactionId rel;
973 
975  Assert(LWLockHeldByMe(ProcArrayLock));
976 
977  /*
978  * Need a FullTransactionId to compare latestXid with. Can't rely on
979  * latestCompletedXid to be initialized in recovery. But in recovery it's
980  * safe to access nextXid without a lock for the startup process.
981  */
984 
985  if (!FullTransactionIdIsValid(cur_latest) ||
986  TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
987  {
989  FullXidRelativeTo(rel, latestXid);
990  }
991 
993 }
994 
995 /*
996  * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
997  *
998  * Remember up to where the startup process initialized the CLOG and subtrans
999  * so we can ensure it's initialized gaplessly up to the point where necessary
1000  * while in recovery.
1001  */
1002 void
1004 {
1006  Assert(TransactionIdIsNormal(initializedUptoXID));
1007 
1008  /*
1009  * we set latestObservedXid to the xid SUBTRANS has been initialized up
1010  * to, so we can extend it from that point onwards in
1011  * RecordKnownAssignedTransactionIds, and when we get consistent in
1012  * ProcArrayApplyRecoveryInfo().
1013  */
1014  latestObservedXid = initializedUptoXID;
1016 }
1017 
1018 /*
1019  * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
1020  *
1021  * Takes us through 3 states: Initialized, Pending and Ready.
1022  * Normal case is to go all the way to Ready straight away, though there
1023  * are atypical cases where we need to take it in steps.
1024  *
1025  * Use the data about running transactions on the primary to create the initial
1026  * state of KnownAssignedXids. We also use these records to regularly prune
1027  * KnownAssignedXids because we know it is possible that some transactions
1028  * with FATAL errors fail to write abort records, which could cause eventual
1029  * overflow.
1030  *
1031  * See comments for LogStandbySnapshot().
1032  */
1033 void
1035 {
1036  TransactionId *xids;
1037  int nxids;
1038  int i;
1039 
1041  Assert(TransactionIdIsValid(running->nextXid));
1044 
1045  /*
1046  * Remove stale transactions, if any.
1047  */
1049 
1050  /*
1051  * Remove stale locks, if any.
1052  */
1054 
1055  /*
1056  * If our snapshot is already valid, nothing else to do...
1057  */
1059  return;
1060 
1061  /*
1062  * If our initial RunningTransactionsData had an overflowed snapshot then
1063  * we knew we were missing some subxids from our snapshot. If we continue
1064  * to see overflowed snapshots then we might never be able to start up, so
1065  * we make another test to see if our snapshot is now valid. We know that
1066  * the missing subxids are equal to or earlier than nextXid. After we
1067  * initialise we continue to apply changes during recovery, so once the
1068  * oldestRunningXid is later than the nextXid from the initial snapshot we
1069  * know that we no longer have missing information and can mark the
1070  * snapshot as valid.
1071  */
1073  {
1074  /*
1075  * If the snapshot isn't overflowed or if its empty we can reset our
1076  * pending state and use this snapshot instead.
1077  */
1078  if (!running->subxid_overflow || running->xcnt == 0)
1079  {
1080  /*
1081  * If we have already collected known assigned xids, we need to
1082  * throw them away before we apply the recovery snapshot.
1083  */
1086  }
1087  else
1088  {
1090  running->oldestRunningXid))
1091  {
1094  "recovery snapshots are now enabled");
1095  }
1096  else
1098  "recovery snapshot waiting for non-overflowed snapshot or "
1099  "until oldest active xid on standby is at least %u (now %u)",
1101  running->oldestRunningXid);
1102  return;
1103  }
1104  }
1105 
1107 
1108  /*
1109  * NB: this can be reached at least twice, so make sure new code can deal
1110  * with that.
1111  */
1112 
1113  /*
1114  * Nobody else is running yet, but take locks anyhow
1115  */
1116  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1117 
1118  /*
1119  * KnownAssignedXids is sorted so we cannot just add the xids, we have to
1120  * sort them first.
1121  *
1122  * Some of the new xids are top-level xids and some are subtransactions.
1123  * We don't call SubTransSetParent because it doesn't matter yet. If we
1124  * aren't overflowed then all xids will fit in snapshot and so we don't
1125  * need subtrans. If we later overflow, an xid assignment record will add
1126  * xids to subtrans. If RunningTransactionsData is overflowed then we
1127  * don't have enough information to correctly update subtrans anyway.
1128  */
1129 
1130  /*
1131  * Allocate a temporary array to avoid modifying the array passed as
1132  * argument.
1133  */
1134  xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt));
1135 
1136  /*
1137  * Add to the temp array any xids which have not already completed.
1138  */
1139  nxids = 0;
1140  for (i = 0; i < running->xcnt + running->subxcnt; i++)
1141  {
1142  TransactionId xid = running->xids[i];
1143 
1144  /*
1145  * The running-xacts snapshot can contain xids that were still visible
1146  * in the procarray when the snapshot was taken, but were already
1147  * WAL-logged as completed. They're not running anymore, so ignore
1148  * them.
1149  */
1151  continue;
1152 
1153  xids[nxids++] = xid;
1154  }
1155 
1156  if (nxids > 0)
1157  {
1158  if (procArray->numKnownAssignedXids != 0)
1159  {
1160  LWLockRelease(ProcArrayLock);
1161  elog(ERROR, "KnownAssignedXids is not empty");
1162  }
1163 
1164  /*
1165  * Sort the array so that we can add them safely into
1166  * KnownAssignedXids.
1167  */
1168  qsort(xids, nxids, sizeof(TransactionId), xidComparator);
1169 
1170  /*
1171  * Add the sorted snapshot into KnownAssignedXids. The running-xacts
1172  * snapshot may include duplicated xids because of prepared
1173  * transactions, so ignore them.
1174  */
1175  for (i = 0; i < nxids; i++)
1176  {
1177  if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i]))
1178  {
1179  elog(DEBUG1,
1180  "found duplicated transaction %u for KnownAssignedXids insertion",
1181  xids[i]);
1182  continue;
1183  }
1184  KnownAssignedXidsAdd(xids[i], xids[i], true);
1185  }
1186 
1188  }
1189 
1190  pfree(xids);
1191 
1192  /*
1193  * latestObservedXid is at least set to the point where SUBTRANS was
1194  * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid
1195  * RecordKnownAssignedTransactionIds() was called for. Initialize
1196  * subtrans from thereon, up to nextXid - 1.
1197  *
1198  * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
1199  * because we've just added xids to the known assigned xids machinery that
1200  * haven't gone through RecordKnownAssignedTransactionId().
1201  */
1205  {
1208  }
1209  TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */
1210 
1211  /* ----------
1212  * Now we've got the running xids we need to set the global values that
1213  * are used to track snapshots as they evolve further.
1214  *
1215  * - latestCompletedXid which will be the xmax for snapshots
1216  * - lastOverflowedXid which shows whether snapshots overflow
1217  * - nextXid
1218  *
1219  * If the snapshot overflowed, then we still initialise with what we know,
1220  * but the recovery snapshot isn't fully valid yet because we know there
1221  * are some subxids missing. We don't know the specific subxids that are
1222  * missing, so conservatively assume the last one is latestObservedXid.
1223  * ----------
1224  */
1225  if (running->subxid_overflow)
1226  {
1228 
1231  }
1232  else
1233  {
1235 
1237  }
1238 
1239  /*
1240  * If a transaction wrote a commit record in the gap between taking and
1241  * logging the snapshot then latestCompletedXid may already be higher than
1242  * the value from the snapshot, so check before we use the incoming value.
1243  * It also might not yet be set at all.
1244  */
1246 
1247  /*
1248  * NB: No need to increment ShmemVariableCache->xactCompletionCount here,
1249  * nobody can see it yet.
1250  */
1251 
1252  LWLockRelease(ProcArrayLock);
1253 
1254  /* ShmemVariableCache->nextXid must be beyond any observed xid. */
1256 
1258 
1261  elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled");
1262  else
1264  "recovery snapshot waiting for non-overflowed snapshot or "
1265  "until oldest active xid on standby is at least %u (now %u)",
1267  running->oldestRunningXid);
1268 }
1269 
1270 /*
1271  * ProcArrayApplyXidAssignment
1272  * Process an XLOG_XACT_ASSIGNMENT WAL record
1273  */
1274 void
1276  int nsubxids, TransactionId *subxids)
1277 {
1278  TransactionId max_xid;
1279  int i;
1280 
1282 
1283  max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
1284 
1285  /*
1286  * Mark all the subtransactions as observed.
1287  *
1288  * NOTE: This will fail if the subxid contains too many previously
1289  * unobserved xids to fit into known-assigned-xids. That shouldn't happen
1290  * as the code stands, because xid-assignment records should never contain
1291  * more than PGPROC_MAX_CACHED_SUBXIDS entries.
1292  */
1294 
1295  /*
1296  * Notice that we update pg_subtrans with the top-level xid, rather than
1297  * the parent xid. This is a difference between normal processing and
1298  * recovery, yet is still correct in all cases. The reason is that
1299  * subtransaction commit is not marked in clog until commit processing, so
1300  * all aborted subtransactions have already been clearly marked in clog.
1301  * As a result we are able to refer directly to the top-level
1302  * transaction's state rather than skipping through all the intermediate
1303  * states in the subtransaction tree. This should be the first time we
1304  * have attempted to SubTransSetParent().
1305  */
1306  for (i = 0; i < nsubxids; i++)
1307  SubTransSetParent(subxids[i], topxid);
1308 
1309  /* KnownAssignedXids isn't maintained yet, so we're done for now */
1311  return;
1312 
1313  /*
1314  * Uses same locking as transaction commit
1315  */
1316  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1317 
1318  /*
1319  * Remove subxids from known-assigned-xacts.
1320  */
1322 
1323  /*
1324  * Advance lastOverflowedXid to be at least the last of these subxids.
1325  */
1327  procArray->lastOverflowedXid = max_xid;
1328 
1329  LWLockRelease(ProcArrayLock);
1330 }
1331 
1332 /*
1333  * TransactionIdIsInProgress -- is given transaction running in some backend
1334  *
1335  * Aside from some shortcuts such as checking RecentXmin and our own Xid,
1336  * there are four possibilities for finding a running transaction:
1337  *
1338  * 1. The given Xid is a main transaction Id. We will find this out cheaply
1339  * by looking at ProcGlobal->xids.
1340  *
1341  * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
1342  * We can find this out cheaply too.
1343  *
1344  * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
1345  * if the Xid is running on the primary.
1346  *
1347  * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
1348  * if that is running according to ProcGlobal->xids[] or KnownAssignedXids.
1349  * This is the slowest way, but sadly it has to be done always if the others
1350  * failed, unless we see that the cached subxact sets are complete (none have
1351  * overflowed).
1352  *
1353  * ProcArrayLock has to be held while we do 1, 2, 3. If we save the top Xids
1354  * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
1355  * This buys back some concurrency (and we can't retrieve the main Xids from
1356  * ProcGlobal->xids[] again anyway; see GetNewTransactionId).
1357  */
1358 bool
1360 {
1361  static TransactionId *xids = NULL;
1362  static TransactionId *other_xids;
1363  XidCacheStatus *other_subxidstates;
1364  int nxids = 0;
1365  ProcArrayStruct *arrayP = procArray;
1366  TransactionId topxid;
1367  TransactionId latestCompletedXid;
1368  int mypgxactoff;
1369  int numProcs;
1370  int j;
1371 
1372  /*
1373  * Don't bother checking a transaction older than RecentXmin; it could not
1374  * possibly still be running. (Note: in particular, this guarantees that
1375  * we reject InvalidTransactionId, FrozenTransactionId, etc as not
1376  * running.)
1377  */
1379  {
1381  return false;
1382  }
1383 
1384  /*
1385  * We may have just checked the status of this transaction, so if it is
1386  * already known to be completed, we can fall out without any access to
1387  * shared memory.
1388  */
1390  {
1392  return false;
1393  }
1394 
1395  /*
1396  * Also, we can handle our own transaction (and subtransactions) without
1397  * any access to shared memory.
1398  */
1400  {
1402  return true;
1403  }
1404 
1405  /*
1406  * If first time through, get workspace to remember main XIDs in. We
1407  * malloc it permanently to avoid repeated palloc/pfree overhead.
1408  */
1409  if (xids == NULL)
1410  {
1411  /*
1412  * In hot standby mode, reserve enough space to hold all xids in the
1413  * known-assigned list. If we later finish recovery, we no longer need
1414  * the bigger array, but we don't bother to shrink it.
1415  */
1416  int maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs;
1417 
1418  xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId));
1419  if (xids == NULL)
1420  ereport(ERROR,
1421  (errcode(ERRCODE_OUT_OF_MEMORY),
1422  errmsg("out of memory")));
1423  }
1424 
1425  other_xids = ProcGlobal->xids;
1426  other_subxidstates = ProcGlobal->subxidStates;
1427 
1428  LWLockAcquire(ProcArrayLock, LW_SHARED);
1429 
1430  /*
1431  * Now that we have the lock, we can check latestCompletedXid; if the
1432  * target Xid is after that, it's surely still running.
1433  */
1434  latestCompletedXid =
1436  if (TransactionIdPrecedes(latestCompletedXid, xid))
1437  {
1438  LWLockRelease(ProcArrayLock);
1440  return true;
1441  }
1442 
1443  /* No shortcuts, gotta grovel through the array */
1444  mypgxactoff = MyProc->pgxactoff;
1445  numProcs = arrayP->numProcs;
1446  for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
1447  {
1448  int pgprocno;
1449  PGPROC *proc;
1450  TransactionId pxid;
1451  int pxids;
1452 
1453  /* Ignore ourselves --- dealt with it above */
1454  if (pgxactoff == mypgxactoff)
1455  continue;
1456 
1457  /* Fetch xid just once - see GetNewTransactionId */
1458  pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
1459 
1460  if (!TransactionIdIsValid(pxid))
1461  continue;
1462 
1463  /*
1464  * Step 1: check the main Xid
1465  */
1466  if (TransactionIdEquals(pxid, xid))
1467  {
1468  LWLockRelease(ProcArrayLock);
1470  return true;
1471  }
1472 
1473  /*
1474  * We can ignore main Xids that are younger than the target Xid, since
1475  * the target could not possibly be their child.
1476  */
1477  if (TransactionIdPrecedes(xid, pxid))
1478  continue;
1479 
1480  /*
1481  * Step 2: check the cached child-Xids arrays
1482  */
1483  pxids = other_subxidstates[pgxactoff].count;
1484  pg_read_barrier(); /* pairs with barrier in GetNewTransactionId() */
1485  pgprocno = arrayP->pgprocnos[pgxactoff];
1486  proc = &allProcs[pgprocno];
1487  for (j = pxids - 1; j >= 0; j--)
1488  {
1489  /* Fetch xid just once - see GetNewTransactionId */
1491 
1492  if (TransactionIdEquals(cxid, xid))
1493  {
1494  LWLockRelease(ProcArrayLock);
1496  return true;
1497  }
1498  }
1499 
1500  /*
1501  * Save the main Xid for step 4. We only need to remember main Xids
1502  * that have uncached children. (Note: there is no race condition
1503  * here because the overflowed flag cannot be cleared, only set, while
1504  * we hold ProcArrayLock. So we can't miss an Xid that we need to
1505  * worry about.)
1506  */
1507  if (other_subxidstates[pgxactoff].overflowed)
1508  xids[nxids++] = pxid;
1509  }
1510 
1511  /*
1512  * Step 3: in hot standby mode, check the known-assigned-xids list. XIDs
1513  * in the list must be treated as running.
1514  */
1515  if (RecoveryInProgress())
1516  {
1517  /* none of the PGPROC entries should have XIDs in hot standby mode */
1518  Assert(nxids == 0);
1519 
1520  if (KnownAssignedXidExists(xid))
1521  {
1522  LWLockRelease(ProcArrayLock);
1524  return true;
1525  }
1526 
1527  /*
1528  * If the KnownAssignedXids overflowed, we have to check pg_subtrans
1529  * too. Fetch all xids from KnownAssignedXids that are lower than
1530  * xid, since if xid is a subtransaction its parent will always have a
1531  * lower value. Note we will collect both main and subXIDs here, but
1532  * there's no help for it.
1533  */
1535  nxids = KnownAssignedXidsGet(xids, xid);
1536  }
1537 
1538  LWLockRelease(ProcArrayLock);
1539 
1540  /*
1541  * If none of the relevant caches overflowed, we know the Xid is not
1542  * running without even looking at pg_subtrans.
1543  */
1544  if (nxids == 0)
1545  {
1547  return false;
1548  }
1549 
1550  /*
1551  * Step 4: have to check pg_subtrans.
1552  *
1553  * At this point, we know it's either a subtransaction of one of the Xids
1554  * in xids[], or it's not running. If it's an already-failed
1555  * subtransaction, we want to say "not running" even though its parent may
1556  * still be running. So first, check pg_xact to see if it's been aborted.
1557  */
1559 
1560  if (TransactionIdDidAbort(xid))
1561  return false;
1562 
1563  /*
1564  * It isn't aborted, so check whether the transaction tree it belongs to
1565  * is still running (or, more precisely, whether it was running when we
1566  * held ProcArrayLock).
1567  */
1568  topxid = SubTransGetTopmostTransaction(xid);
1569  Assert(TransactionIdIsValid(topxid));
1570  if (!TransactionIdEquals(topxid, xid))
1571  {
1572  for (int i = 0; i < nxids; i++)
1573  {
1574  if (TransactionIdEquals(xids[i], topxid))
1575  return true;
1576  }
1577  }
1578 
1579  return false;
1580 }
1581 
1582 /*
1583  * TransactionIdIsActive -- is xid the top-level XID of an active backend?
1584  *
1585  * This differs from TransactionIdIsInProgress in that it ignores prepared
1586  * transactions, as well as transactions running on the primary if we're in
1587  * hot standby. Also, we ignore subtransactions since that's not needed
1588  * for current uses.
1589  */
1590 bool
1592 {
1593  bool result = false;
1594  ProcArrayStruct *arrayP = procArray;
1595  TransactionId *other_xids = ProcGlobal->xids;
1596  int i;
1597 
1598  /*
1599  * Don't bother checking a transaction older than RecentXmin; it could not
1600  * possibly still be running.
1601  */
1603  return false;
1604 
1605  LWLockAcquire(ProcArrayLock, LW_SHARED);
1606 
1607  for (i = 0; i < arrayP->numProcs; i++)
1608  {
1609  int pgprocno = arrayP->pgprocnos[i];
1610  PGPROC *proc = &allProcs[pgprocno];
1611  TransactionId pxid;
1612 
1613  /* Fetch xid just once - see GetNewTransactionId */
1614  pxid = UINT32_ACCESS_ONCE(other_xids[i]);
1615 
1616  if (!TransactionIdIsValid(pxid))
1617  continue;
1618 
1619  if (proc->pid == 0)
1620  continue; /* ignore prepared transactions */
1621 
1622  if (TransactionIdEquals(pxid, xid))
1623  {
1624  result = true;
1625  break;
1626  }
1627  }
1628 
1629  LWLockRelease(ProcArrayLock);
1630 
1631  return result;
1632 }
1633 
1634 
1635 /*
1636  * Determine XID horizons.
1637  *
1638  * This is used by wrapper functions like GetOldestNonRemovableTransactionId()
1639  * (for VACUUM), GetReplicationHorizons() (for hot_standby_feedback), etc as
1640  * well as "internally" by GlobalVisUpdate() (see comment above struct
1641  * GlobalVisState).
1642  *
1643  * See the definition of ComputeXidHorizonsResult for the various computed
1644  * horizons.
1645  *
1646  * For VACUUM separate horizons (used to decide which deleted tuples must
1647  * be preserved), for shared and non-shared tables are computed. For shared
1648  * relations backends in all databases must be considered, but for non-shared
1649  * relations that's not required, since only backends in my own database could
1650  * ever see the tuples in them. Also, we can ignore concurrently running lazy
1651  * VACUUMs because (a) they must be working on other tables, and (b) they
1652  * don't need to do snapshot-based lookups. Similarly, for the non-catalog
1653  * horizon, we can ignore CREATE INDEX CONCURRENTLY and REINDEX CONCURRENTLY
1654  * when they are working on non-partial, non-expressional indexes, for the
1655  * same reasons and because they can't run in transaction blocks. (They are
1656  * not possible to ignore for catalogs, because CIC and RC do some catalog
1657  * operations.) Do note that this means that CIC and RC must use a lock level
1658  * that conflicts with VACUUM.
1659  *
1660  * This also computes a horizon used to truncate pg_subtrans. For that
1661  * backends in all databases have to be considered, and concurrently running
1662  * lazy VACUUMs cannot be ignored, as they still may perform pg_subtrans
1663  * accesses.
1664  *
1665  * Note: we include all currently running xids in the set of considered xids.
1666  * This ensures that if a just-started xact has not yet set its snapshot,
1667  * when it does set the snapshot it cannot set xmin less than what we compute.
1668  * See notes in src/backend/access/transam/README.
1669  *
1670  * Note: despite the above, it's possible for the calculated values to move
1671  * backwards on repeated calls. The calculated values are conservative, so
1672  * that anything older is definitely not considered as running by anyone
1673  * anymore, but the exact values calculated depend on a number of things. For
1674  * example, if there are no transactions running in the current database, the
1675  * horizon for normal tables will be latestCompletedXid. If a transaction
1676  * begins after that, its xmin will include in-progress transactions in other
1677  * databases that started earlier, so another call will return a lower value.
1678  * Nonetheless it is safe to vacuum a table in the current database with the
1679  * first result. There are also replication-related effects: a walsender
1680  * process can set its xmin based on transactions that are no longer running
1681  * on the primary but are still being replayed on the standby, thus possibly
1682  * making the values go backwards. In this case there is a possibility that
1683  * we lose data that the standby would like to have, but unless the standby
1684  * uses a replication slot to make its xmin persistent there is little we can
1685  * do about that --- data is only protected if the walsender runs continuously
1686  * while queries are executed on the standby. (The Hot Standby code deals
1687  * with such cases by failing standby queries that needed to access
1688  * already-removed data, so there's no integrity bug.) The computed values
1689  * are also adjusted with vacuum_defer_cleanup_age, so increasing that setting
1690  * on the fly is another easy way to make horizons move backwards, with no
1691  * consequences for data integrity.
1692  *
1693  * Note: the approximate horizons (see definition of GlobalVisState) are
1694  * updated by the computations done here. That's currently required for
1695  * correctness and a small optimization. Without doing so it's possible that
1696  * heap vacuum's call to heap_page_prune() uses a more conservative horizon
1697  * than later when deciding which tuples can be removed - which the code
1698  * doesn't expect (breaking HOT).
1699  */
1700 static void
1702 {
1703  ProcArrayStruct *arrayP = procArray;
1704  TransactionId kaxmin;
1705  bool in_recovery = RecoveryInProgress();
1706  TransactionId *other_xids = ProcGlobal->xids;
1707 
1708  LWLockAcquire(ProcArrayLock, LW_SHARED);
1709 
1711 
1712  /*
1713  * We initialize the MIN() calculation with latestCompletedXid + 1. This
1714  * is a lower bound for the XIDs that might appear in the ProcArray later,
1715  * and so protects us against overestimating the result due to future
1716  * additions.
1717  */
1718  {
1719  TransactionId initial;
1720 
1722  Assert(TransactionIdIsValid(initial));
1723  TransactionIdAdvance(initial);
1724 
1725  h->oldest_considered_running = initial;
1726  h->shared_oldest_nonremovable = initial;
1727  h->catalog_oldest_nonremovable = initial;
1728  h->data_oldest_nonremovable = initial;
1729 
1730  /*
1731  * Only modifications made by this backend affect the horizon for
1732  * temporary relations. Instead of a check in each iteration of the
1733  * loop over all PGPROCs it is cheaper to just initialize to the
1734  * current top-level xid any.
1735  *
1736  * Without an assigned xid we could use a horizon as aggressive as
1737  * ReadNewTransactionid(), but we can get away with the much cheaper
1738  * latestCompletedXid + 1: If this backend has no xid there, by
1739  * definition, can't be any newer changes in the temp table than
1740  * latestCompletedXid.
1741  */
1744  else
1745  h->temp_oldest_nonremovable = initial;
1746  }
1747 
1748  /*
1749  * Fetch slot horizons while ProcArrayLock is held - the
1750  * LWLockAcquire/LWLockRelease are a barrier, ensuring this happens inside
1751  * the lock.
1752  */
1755 
1756  for (int index = 0; index < arrayP->numProcs; index++)
1757  {
1758  int pgprocno = arrayP->pgprocnos[index];
1759  PGPROC *proc = &allProcs[pgprocno];
1760  int8 statusFlags = ProcGlobal->statusFlags[index];
1761  TransactionId xid;
1762  TransactionId xmin;
1763 
1764  /* Fetch xid just once - see GetNewTransactionId */
1765  xid = UINT32_ACCESS_ONCE(other_xids[index]);
1766  xmin = UINT32_ACCESS_ONCE(proc->xmin);
1767 
1768  /*
1769  * Consider both the transaction's Xmin, and its Xid.
1770  *
1771  * We must check both because a transaction might have an Xmin but not
1772  * (yet) an Xid; conversely, if it has an Xid, that could determine
1773  * some not-yet-set Xmin.
1774  */
1775  xmin = TransactionIdOlder(xmin, xid);
1776 
1777  /* if neither is set, this proc doesn't influence the horizon */
1778  if (!TransactionIdIsValid(xmin))
1779  continue;
1780 
1781  /*
1782  * Don't ignore any procs when determining which transactions might be
1783  * considered running. While slots should ensure logical decoding
1784  * backends are protected even without this check, it can't hurt to
1785  * include them here as well..
1786  */
1789 
1790  /*
1791  * Skip over backends either vacuuming (which is ok with rows being
1792  * removed, as long as pg_subtrans is not truncated) or doing logical
1793  * decoding (which manages xmin separately, check below).
1794  */
1795  if (statusFlags & (PROC_IN_VACUUM | PROC_IN_LOGICAL_DECODING))
1796  continue;
1797 
1798  /* shared tables need to take backends in all databases into account */
1801 
1802  /*
1803  * Normally queries in other databases are ignored for anything but
1804  * the shared horizon. But in recovery we cannot compute an accurate
1805  * per-database horizon as all xids are managed via the
1806  * KnownAssignedXids machinery.
1807  *
1808  * Be careful to compute a pessimistic value when MyDatabaseId is not
1809  * set. If this is a backend in the process of starting up, we may not
1810  * use a "too aggressive" horizon (otherwise we could end up using it
1811  * to prune still needed data away). If the current backend never
1812  * connects to a database that is harmless, because
1813  * data_oldest_nonremovable will never be utilized.
1814  */
1815  if (in_recovery ||
1817  proc->databaseId == 0) /* always include WalSender */
1818  {
1819  /*
1820  * We can ignore this backend if it's running CREATE INDEX
1821  * CONCURRENTLY or REINDEX CONCURRENTLY on a "safe" index -- but
1822  * only on vacuums of user-defined tables.
1823  */
1824  if (!(statusFlags & PROC_IN_SAFE_IC))
1827 
1828  /* Catalog tables need to consider all backends in this db */
1831 
1832  }
1833  }
1834 
1835  /* catalog horizon should never be later than data */
1838 
1839  /*
1840  * If in recovery fetch oldest xid in KnownAssignedXids, will be applied
1841  * after lock is released.
1842  */
1843  if (in_recovery)
1844  kaxmin = KnownAssignedXidsGetOldestXmin();
1845 
1846  /*
1847  * No other information from shared state is needed, release the lock
1848  * immediately. The rest of the computations can be done without a lock.
1849  */
1850  LWLockRelease(ProcArrayLock);
1851 
1852  if (in_recovery)
1853  {
1862  /* temp relations cannot be accessed in recovery */
1863  }
1864  else
1865  {
1866  /*
1867  * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age.
1868  *
1869  * vacuum_defer_cleanup_age provides some additional "slop" for the
1870  * benefit of hot standby queries on standby servers. This is quick
1871  * and dirty, and perhaps not all that useful unless the primary has a
1872  * predictable transaction rate, but it offers some protection when
1873  * there's no walsender connection. Note that we are assuming
1874  * vacuum_defer_cleanup_age isn't large enough to cause wraparound ---
1875  * so guc.c should limit it to no more than the xidStopLimit threshold
1876  * in varsup.c. Also note that we intentionally don't apply
1877  * vacuum_defer_cleanup_age on standby servers.
1878  */
1891  /* defer doesn't apply to temp relations */
1892  }
1893 
1894  /*
1895  * Check whether there are replication slots requiring an older xmin.
1896  */
1901 
1902  /*
1903  * The only difference between catalog / data horizons is that the slot's
1904  * catalog xmin is applied to the catalog one (so catalogs can be accessed
1905  * for logical decoding). Initialize with data horizon, and then back up
1906  * further if necessary. Have to back up the shared horizon as well, since
1907  * that also can contain catalogs.
1908  */
1912  h->slot_catalog_xmin);
1915  h->slot_xmin);
1918  h->slot_catalog_xmin);
1919 
1920  /*
1921  * It's possible that slots / vacuum_defer_cleanup_age backed up the
1922  * horizons further than oldest_considered_running. Fix.
1923  */
1933 
1934  /*
1935  * shared horizons have to be at least as old as the oldest visible in
1936  * current db
1937  */
1942 
1943  /*
1944  * Horizons need to ensure that pg_subtrans access is still possible for
1945  * the relevant backends.
1946  */
1957  h->slot_xmin));
1960  h->slot_catalog_xmin));
1961 
1962  /* update approximate horizons with the computed horizons */
1964 }
1965 
1966 /*
1967  * Determine what kind of visibility horizon needs to be used for a
1968  * relation. If rel is NULL, the most conservative horizon is used.
1969  */
1970 static inline GlobalVisHorizonKind
1972 {
1973  /*
1974  * Other relkkinds currently don't contain xids, nor always the necessary
1975  * logical decoding markers.
1976  */
1977  Assert(!rel ||
1978  rel->rd_rel->relkind == RELKIND_RELATION ||
1979  rel->rd_rel->relkind == RELKIND_MATVIEW ||
1980  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
1981 
1982  if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress())
1983  return VISHORIZON_SHARED;
1984  else if (IsCatalogRelation(rel) ||
1986  return VISHORIZON_CATALOG;
1987  else if (!RELATION_IS_LOCAL(rel))
1988  return VISHORIZON_DATA;
1989  else
1990  return VISHORIZON_TEMP;
1991 }
1992 
1993 /*
1994  * Return the oldest XID for which deleted tuples must be preserved in the
1995  * passed table.
1996  *
1997  * If rel is not NULL the horizon may be considerably more recent than
1998  * otherwise (i.e. fewer tuples will be removable). In the NULL case a horizon
1999  * that is correct (but not optimal) for all relations will be returned.
2000  *
2001  * This is used by VACUUM to decide which deleted tuples must be preserved in
2002  * the passed in table.
2003  */
2006 {
2007  ComputeXidHorizonsResult horizons;
2008 
2009  ComputeXidHorizons(&horizons);
2010 
2011  switch (GlobalVisHorizonKindForRel(rel))
2012  {
2013  case VISHORIZON_SHARED:
2014  return horizons.shared_oldest_nonremovable;
2015  case VISHORIZON_CATALOG:
2016  return horizons.catalog_oldest_nonremovable;
2017  case VISHORIZON_DATA:
2018  return horizons.data_oldest_nonremovable;
2019  case VISHORIZON_TEMP:
2020  return horizons.temp_oldest_nonremovable;
2021  }
2022 
2023  /* just to prevent compiler warnings */
2024  return InvalidTransactionId;
2025 }
2026 
2027 /*
2028  * Return the oldest transaction id any currently running backend might still
2029  * consider running. This should not be used for visibility / pruning
2030  * determinations (see GetOldestNonRemovableTransactionId()), but for
2031  * decisions like up to where pg_subtrans can be truncated.
2032  */
2035 {
2036  ComputeXidHorizonsResult horizons;
2037 
2038  ComputeXidHorizons(&horizons);
2039 
2040  return horizons.oldest_considered_running;
2041 }
2042 
2043 /*
2044  * Return the visibility horizons for a hot standby feedback message.
2045  */
2046 void
2048 {
2049  ComputeXidHorizonsResult horizons;
2050 
2051  ComputeXidHorizons(&horizons);
2052 
2053  /*
2054  * Don't want to use shared_oldest_nonremovable here, as that contains the
2055  * effect of replication slot's catalog_xmin. We want to send a separate
2056  * feedback for the catalog horizon, so the primary can remove data table
2057  * contents more aggressively.
2058  */
2059  *xmin = horizons.shared_oldest_nonremovable_raw;
2060  *catalog_xmin = horizons.slot_catalog_xmin;
2061 }
2062 
2063 /*
2064  * GetMaxSnapshotXidCount -- get max size for snapshot XID array
2065  *
2066  * We have to export this for use by snapmgr.c.
2067  */
2068 int
2070 {
2071  return procArray->maxProcs;
2072 }
2073 
2074 /*
2075  * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
2076  *
2077  * We have to export this for use by snapmgr.c.
2078  */
2079 int
2081 {
2082  return TOTAL_MAX_CACHED_SUBXIDS;
2083 }
2084 
2085 /*
2086  * Initialize old_snapshot_threshold specific parts of a newly build snapshot.
2087  */
2088 static void
2090 {
2092  {
2093  /*
2094  * If not using "snapshot too old" feature, fill related fields with
2095  * dummy values that don't require any locking.
2096  */
2097  snapshot->lsn = InvalidXLogRecPtr;
2098  snapshot->whenTaken = 0;
2099  }
2100  else
2101  {
2102  /*
2103  * Capture the current time and WAL stream location in case this
2104  * snapshot becomes old enough to need to fall back on the special
2105  * "old snapshot" logic.
2106  */
2107  snapshot->lsn = GetXLogInsertRecPtr();
2108  snapshot->whenTaken = GetSnapshotCurrentTimestamp();
2109  MaintainOldSnapshotTimeMapping(snapshot->whenTaken, snapshot->xmin);
2110  }
2111 }
2112 
2113 /*
2114  * Helper function for GetSnapshotData() that checks if the bulk of the
2115  * visibility information in the snapshot is still valid. If so, it updates
2116  * the fields that need to change and returns true. Otherwise it returns
2117  * false.
2118  *
2119  * This very likely can be evolved to not need ProcArrayLock held (at very
2120  * least in the case we already hold a snapshot), but that's for another day.
2121  */
2122 static bool
2124 {
2125  uint64 curXactCompletionCount;
2126 
2127  Assert(LWLockHeldByMe(ProcArrayLock));
2128 
2129  if (unlikely(snapshot->snapXactCompletionCount == 0))
2130  return false;
2131 
2132  curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
2133  if (curXactCompletionCount != snapshot->snapXactCompletionCount)
2134  return false;
2135 
2136  /*
2137  * If the current xactCompletionCount is still the same as it was at the
2138  * time the snapshot was built, we can be sure that rebuilding the
2139  * contents of the snapshot the hard way would result in the same snapshot
2140  * contents:
2141  *
2142  * As explained in transam/README, the set of xids considered running by
2143  * GetSnapshotData() cannot change while ProcArrayLock is held. Snapshot
2144  * contents only depend on transactions with xids and xactCompletionCount
2145  * is incremented whenever a transaction with an xid finishes (while
2146  * holding ProcArrayLock) exclusively). Thus the xactCompletionCount check
2147  * ensures we would detect if the snapshot would have changed.
2148  *
2149  * As the snapshot contents are the same as it was before, it is safe to
2150  * re-enter the snapshot's xmin into the PGPROC array. None of the rows
2151  * visible under the snapshot could already have been removed (that'd
2152  * require the set of running transactions to change) and it fulfills the
2153  * requirement that concurrent GetSnapshotData() calls yield the same
2154  * xmin.
2155  */
2157  MyProc->xmin = TransactionXmin = snapshot->xmin;
2158 
2159  RecentXmin = snapshot->xmin;
2161 
2162  snapshot->curcid = GetCurrentCommandId(false);
2163  snapshot->active_count = 0;
2164  snapshot->regd_count = 0;
2165  snapshot->copied = false;
2166 
2168 
2169  return true;
2170 }
2171 
2172 /*
2173  * GetSnapshotData -- returns information about running transactions.
2174  *
2175  * The returned snapshot includes xmin (lowest still-running xact ID),
2176  * xmax (highest completed xact ID + 1), and a list of running xact IDs
2177  * in the range xmin <= xid < xmax. It is used as follows:
2178  * All xact IDs < xmin are considered finished.
2179  * All xact IDs >= xmax are considered still running.
2180  * For an xact ID xmin <= xid < xmax, consult list to see whether
2181  * it is considered running or not.
2182  * This ensures that the set of transactions seen as "running" by the
2183  * current xact will not change after it takes the snapshot.
2184  *
2185  * All running top-level XIDs are included in the snapshot, except for lazy
2186  * VACUUM processes. We also try to include running subtransaction XIDs,
2187  * but since PGPROC has only a limited cache area for subxact XIDs, full
2188  * information may not be available. If we find any overflowed subxid arrays,
2189  * we have to mark the snapshot's subxid data as overflowed, and extra work
2190  * *may* need to be done to determine what's running (see XidInMVCCSnapshot()
2191  * in heapam_visibility.c).
2192  *
2193  * We also update the following backend-global variables:
2194  * TransactionXmin: the oldest xmin of any snapshot in use in the
2195  * current transaction (this is the same as MyProc->xmin).
2196  * RecentXmin: the xmin computed for the most recent snapshot. XIDs
2197  * older than this are known not running any more.
2198  *
2199  * And try to advance the bounds of GlobalVis{Shared,Catalog,Data,Temp}Rels
2200  * for the benefit of the GlobalVisTest* family of functions.
2201  *
2202  * Note: this function should probably not be called with an argument that's
2203  * not statically allocated (see xip allocation below).
2204  */
2205 Snapshot
2207 {
2208  ProcArrayStruct *arrayP = procArray;
2209  TransactionId *other_xids = ProcGlobal->xids;
2210  TransactionId xmin;
2211  TransactionId xmax;
2212  int count = 0;
2213  int subcount = 0;
2214  bool suboverflowed = false;
2215  FullTransactionId latest_completed;
2216  TransactionId oldestxid;
2217  int mypgxactoff;
2218  TransactionId myxid;
2219  uint64 curXactCompletionCount;
2220 
2221  TransactionId replication_slot_xmin = InvalidTransactionId;
2222  TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
2223 
2224  Assert(snapshot != NULL);
2225 
2226  /*
2227  * Allocating space for maxProcs xids is usually overkill; numProcs would
2228  * be sufficient. But it seems better to do the malloc while not holding
2229  * the lock, so we can't look at numProcs. Likewise, we allocate much
2230  * more subxip storage than is probably needed.
2231  *
2232  * This does open a possibility for avoiding repeated malloc/free: since
2233  * maxProcs does not change at runtime, we can simply reuse the previous
2234  * xip arrays if any. (This relies on the fact that all callers pass
2235  * static SnapshotData structs.)
2236  */
2237  if (snapshot->xip == NULL)
2238  {
2239  /*
2240  * First call for this snapshot. Snapshot is same size whether or not
2241  * we are in recovery, see later comments.
2242  */
2243  snapshot->xip = (TransactionId *)
2245  if (snapshot->xip == NULL)
2246  ereport(ERROR,
2247  (errcode(ERRCODE_OUT_OF_MEMORY),
2248  errmsg("out of memory")));
2249  Assert(snapshot->subxip == NULL);
2250  snapshot->subxip = (TransactionId *)
2252  if (snapshot->subxip == NULL)
2253  ereport(ERROR,
2254  (errcode(ERRCODE_OUT_OF_MEMORY),
2255  errmsg("out of memory")));
2256  }
2257 
2258  /*
2259  * It is sufficient to get shared lock on ProcArrayLock, even if we are
2260  * going to set MyProc->xmin.
2261  */
2262  LWLockAcquire(ProcArrayLock, LW_SHARED);
2263 
2264  if (GetSnapshotDataReuse(snapshot))
2265  {
2266  LWLockRelease(ProcArrayLock);
2267  return snapshot;
2268  }
2269 
2270  latest_completed = ShmemVariableCache->latestCompletedXid;
2271  mypgxactoff = MyProc->pgxactoff;
2272  myxid = other_xids[mypgxactoff];
2273  Assert(myxid == MyProc->xid);
2274 
2275  oldestxid = ShmemVariableCache->oldestXid;
2276  curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
2277 
2278  /* xmax is always latestCompletedXid + 1 */
2279  xmax = XidFromFullTransactionId(latest_completed);
2280  TransactionIdAdvance(xmax);
2282 
2283  /* initialize xmin calculation with xmax */
2284  xmin = xmax;
2285 
2286  /* take own xid into account, saves a check inside the loop */
2287  if (TransactionIdIsNormal(myxid) && NormalTransactionIdPrecedes(myxid, xmin))
2288  xmin = myxid;
2289 
2291 
2292  if (!snapshot->takenDuringRecovery)
2293  {
2294  int numProcs = arrayP->numProcs;
2295  TransactionId *xip = snapshot->xip;
2296  int *pgprocnos = arrayP->pgprocnos;
2297  XidCacheStatus *subxidStates = ProcGlobal->subxidStates;
2298  uint8 *allStatusFlags = ProcGlobal->statusFlags;
2299 
2300  /*
2301  * First collect set of pgxactoff/xids that need to be included in the
2302  * snapshot.
2303  */
2304  for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
2305  {
2306  /* Fetch xid just once - see GetNewTransactionId */
2307  TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
2308  uint8 statusFlags;
2309 
2310  Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff);
2311 
2312  /*
2313  * If the transaction has no XID assigned, we can skip it; it
2314  * won't have sub-XIDs either.
2315  */
2316  if (likely(xid == InvalidTransactionId))
2317  continue;
2318 
2319  /*
2320  * We don't include our own XIDs (if any) in the snapshot. It
2321  * needs to be includeded in the xmin computation, but we did so
2322  * outside the loop.
2323  */
2324  if (pgxactoff == mypgxactoff)
2325  continue;
2326 
2327  /*
2328  * The only way we are able to get here with a non-normal xid is
2329  * during bootstrap - with this backend using
2330  * BootstrapTransactionId. But the above test should filter that
2331  * out.
2332  */
2334 
2335  /*
2336  * If the XID is >= xmax, we can skip it; such transactions will
2337  * be treated as running anyway (and any sub-XIDs will also be >=
2338  * xmax).
2339  */
2340  if (!NormalTransactionIdPrecedes(xid, xmax))
2341  continue;
2342 
2343  /*
2344  * Skip over backends doing logical decoding which manages xmin
2345  * separately (check below) and ones running LAZY VACUUM.
2346  */
2347  statusFlags = allStatusFlags[pgxactoff];
2348  if (statusFlags & (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM))
2349  continue;
2350 
2351  if (NormalTransactionIdPrecedes(xid, xmin))
2352  xmin = xid;
2353 
2354  /* Add XID to snapshot. */
2355  xip[count++] = xid;
2356 
2357  /*
2358  * Save subtransaction XIDs if possible (if we've already
2359  * overflowed, there's no point). Note that the subxact XIDs must
2360  * be later than their parent, so no need to check them against
2361  * xmin. We could filter against xmax, but it seems better not to
2362  * do that much work while holding the ProcArrayLock.
2363  *
2364  * The other backend can add more subxids concurrently, but cannot
2365  * remove any. Hence it's important to fetch nxids just once.
2366  * Should be safe to use memcpy, though. (We needn't worry about
2367  * missing any xids added concurrently, because they must postdate
2368  * xmax.)
2369  *
2370  * Again, our own XIDs are not included in the snapshot.
2371  */
2372  if (!suboverflowed)
2373  {
2374 
2375  if (subxidStates[pgxactoff].overflowed)
2376  suboverflowed = true;
2377  else
2378  {
2379  int nsubxids = subxidStates[pgxactoff].count;
2380 
2381  if (nsubxids > 0)
2382  {
2383  int pgprocno = pgprocnos[pgxactoff];
2384  PGPROC *proc = &allProcs[pgprocno];
2385 
2386  pg_read_barrier(); /* pairs with GetNewTransactionId */
2387 
2388  memcpy(snapshot->subxip + subcount,
2389  (void *) proc->subxids.xids,
2390  nsubxids * sizeof(TransactionId));
2391  subcount += nsubxids;
2392  }
2393  }
2394  }
2395  }
2396  }
2397  else
2398  {
2399  /*
2400  * We're in hot standby, so get XIDs from KnownAssignedXids.
2401  *
2402  * We store all xids directly into subxip[]. Here's why:
2403  *
2404  * In recovery we don't know which xids are top-level and which are
2405  * subxacts, a design choice that greatly simplifies xid processing.
2406  *
2407  * It seems like we would want to try to put xids into xip[] only, but
2408  * that is fairly small. We would either need to make that bigger or
2409  * to increase the rate at which we WAL-log xid assignment; neither is
2410  * an appealing choice.
2411  *
2412  * We could try to store xids into xip[] first and then into subxip[]
2413  * if there are too many xids. That only works if the snapshot doesn't
2414  * overflow because we do not search subxip[] in that case. A simpler
2415  * way is to just store all xids in the subxact array because this is
2416  * by far the bigger array. We just leave the xip array empty.
2417  *
2418  * Either way we need to change the way XidInMVCCSnapshot() works
2419  * depending upon when the snapshot was taken, or change normal
2420  * snapshot processing so it matches.
2421  *
2422  * Note: It is possible for recovery to end before we finish taking
2423  * the snapshot, and for newly assigned transaction ids to be added to
2424  * the ProcArray. xmax cannot change while we hold ProcArrayLock, so
2425  * those newly added transaction ids would be filtered away, so we
2426  * need not be concerned about them.
2427  */
2428  subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin,
2429  xmax);
2430 
2432  suboverflowed = true;
2433  }
2434 
2435 
2436  /*
2437  * Fetch into local variable while ProcArrayLock is held - the
2438  * LWLockRelease below is a barrier, ensuring this happens inside the
2439  * lock.
2440  */
2441  replication_slot_xmin = procArray->replication_slot_xmin;
2442  replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
2443 
2445  MyProc->xmin = TransactionXmin = xmin;
2446 
2447  LWLockRelease(ProcArrayLock);
2448 
2449  /* maintain state for GlobalVis* */
2450  {
2451  TransactionId def_vis_xid;
2452  TransactionId def_vis_xid_data;
2453  FullTransactionId def_vis_fxid;
2454  FullTransactionId def_vis_fxid_data;
2455  FullTransactionId oldestfxid;
2456 
2457  /*
2458  * Converting oldestXid is only safe when xid horizon cannot advance,
2459  * i.e. holding locks. While we don't hold the lock anymore, all the
2460  * necessary data has been gathered with lock held.
2461  */
2462  oldestfxid = FullXidRelativeTo(latest_completed, oldestxid);
2463 
2464  /* apply vacuum_defer_cleanup_age */
2465  def_vis_xid_data =
2467 
2468  /* Check whether there's a replication slot requiring an older xmin. */
2469  def_vis_xid_data =
2470  TransactionIdOlder(def_vis_xid_data, replication_slot_xmin);
2471 
2472  /*
2473  * Rows in non-shared, non-catalog tables possibly could be vacuumed
2474  * if older than this xid.
2475  */
2476  def_vis_xid = def_vis_xid_data;
2477 
2478  /*
2479  * Check whether there's a replication slot requiring an older catalog
2480  * xmin.
2481  */
2482  def_vis_xid =
2483  TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid);
2484 
2485  def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid);
2486  def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data);
2487 
2488  /*
2489  * Check if we can increase upper bound. As a previous
2490  * GlobalVisUpdate() might have computed more aggressive values, don't
2491  * overwrite them if so.
2492  */
2494  FullTransactionIdNewer(def_vis_fxid,
2497  FullTransactionIdNewer(def_vis_fxid,
2500  FullTransactionIdNewer(def_vis_fxid_data,
2502  /* See temp_oldest_nonremovable computation in ComputeXidHorizons() */
2503  if (TransactionIdIsNormal(myxid))
2505  FullXidRelativeTo(latest_completed, myxid);
2506  else
2507  {
2508  GlobalVisTempRels.definitely_needed = latest_completed;
2510  }
2511 
2512  /*
2513  * Check if we know that we can initialize or increase the lower
2514  * bound. Currently the only cheap way to do so is to use
2515  * ShmemVariableCache->oldestXid as input.
2516  *
2517  * We should definitely be able to do better. We could e.g. put a
2518  * global lower bound value into ShmemVariableCache.
2519  */
2522  oldestfxid);
2525  oldestfxid);
2528  oldestfxid);
2529  /* accurate value known */
2531  }
2532 
2533  RecentXmin = xmin;
2535 
2536  snapshot->xmin = xmin;
2537  snapshot->xmax = xmax;
2538  snapshot->xcnt = count;
2539  snapshot->subxcnt = subcount;
2540  snapshot->suboverflowed = suboverflowed;
2541  snapshot->snapXactCompletionCount = curXactCompletionCount;
2542 
2543  snapshot->curcid = GetCurrentCommandId(false);
2544 
2545  /*
2546  * This is a new snapshot, so set both refcounts are zero, and mark it as
2547  * not copied in persistent memory.
2548  */
2549  snapshot->active_count = 0;
2550  snapshot->regd_count = 0;
2551  snapshot->copied = false;
2552 
2554 
2555  return snapshot;
2556 }
2557 
2558 /*
2559  * ProcArrayInstallImportedXmin -- install imported xmin into MyProc->xmin
2560  *
2561  * This is called when installing a snapshot imported from another
2562  * transaction. To ensure that OldestXmin doesn't go backwards, we must
2563  * check that the source transaction is still running, and we'd better do
2564  * that atomically with installing the new xmin.
2565  *
2566  * Returns true if successful, false if source xact is no longer running.
2567  */
2568 bool
2570  VirtualTransactionId *sourcevxid)
2571 {
2572  bool result = false;
2573  ProcArrayStruct *arrayP = procArray;
2574  int index;
2575 
2577  if (!sourcevxid)
2578  return false;
2579 
2580  /* Get lock so source xact can't end while we're doing this */
2581  LWLockAcquire(ProcArrayLock, LW_SHARED);
2582 
2583  for (index = 0; index < arrayP->numProcs; index++)
2584  {
2585  int pgprocno = arrayP->pgprocnos[index];
2586  PGPROC *proc = &allProcs[pgprocno];
2587  int statusFlags = ProcGlobal->statusFlags[index];
2588  TransactionId xid;
2589 
2590  /* Ignore procs running LAZY VACUUM */
2591  if (statusFlags & PROC_IN_VACUUM)
2592  continue;
2593 
2594  /* We are only interested in the specific virtual transaction. */
2595  if (proc->backendId != sourcevxid->backendId)
2596  continue;
2597  if (proc->lxid != sourcevxid->localTransactionId)
2598  continue;
2599 
2600  /*
2601  * We check the transaction's database ID for paranoia's sake: if it's
2602  * in another DB then its xmin does not cover us. Caller should have
2603  * detected this already, so we just treat any funny cases as
2604  * "transaction not found".
2605  */
2606  if (proc->databaseId != MyDatabaseId)
2607  continue;
2608 
2609  /*
2610  * Likewise, let's just make real sure its xmin does cover us.
2611  */
2612  xid = UINT32_ACCESS_ONCE(proc->xmin);
2613  if (!TransactionIdIsNormal(xid) ||
2614  !TransactionIdPrecedesOrEquals(xid, xmin))
2615  continue;
2616 
2617  /*
2618  * We're good. Install the new xmin. As in GetSnapshotData, set
2619  * TransactionXmin too. (Note that because snapmgr.c called
2620  * GetSnapshotData first, we'll be overwriting a valid xmin here, so
2621  * we don't check that.)
2622  */
2623  MyProc->xmin = TransactionXmin = xmin;
2624 
2625  result = true;
2626  break;
2627  }
2628 
2629  LWLockRelease(ProcArrayLock);
2630 
2631  return result;
2632 }
2633 
2634 /*
2635  * ProcArrayInstallRestoredXmin -- install restored xmin into MyProc->xmin
2636  *
2637  * This is like ProcArrayInstallImportedXmin, but we have a pointer to the
2638  * PGPROC of the transaction from which we imported the snapshot, rather than
2639  * an XID.
2640  *
2641  * Note that this function also copies statusFlags from the source `proc` in
2642  * order to avoid the case where MyProc's xmin needs to be skipped for
2643  * computing xid horizon.
2644  *
2645  * Returns true if successful, false if source xact is no longer running.
2646  */
2647 bool
2649 {
2650  bool result = false;
2651  TransactionId xid;
2652 
2654  Assert(proc != NULL);
2655 
2656  /*
2657  * Get an exclusive lock so that we can copy statusFlags from source proc.
2658  */
2659  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2660 
2661  /*
2662  * Be certain that the referenced PGPROC has an advertised xmin which is
2663  * no later than the one we're installing, so that the system-wide xmin
2664  * can't go backwards. Also, make sure it's running in the same database,
2665  * so that the per-database xmin cannot go backwards.
2666  */
2667  xid = UINT32_ACCESS_ONCE(proc->xmin);
2668  if (proc->databaseId == MyDatabaseId &&
2669  TransactionIdIsNormal(xid) &&
2670  TransactionIdPrecedesOrEquals(xid, xmin))
2671  {
2672  /* Install xmin */
2673  MyProc->xmin = TransactionXmin = xmin;
2674 
2675  /* Flags being copied must be valid copy-able flags. */
2676  Assert((proc->statusFlags & (~PROC_COPYABLE_FLAGS)) == 0);
2677  MyProc->statusFlags = proc->statusFlags;
2679 
2680  result = true;
2681  }
2682 
2683  LWLockRelease(ProcArrayLock);
2684 
2685  return result;
2686 }
2687 
2688 /*
2689  * GetRunningTransactionData -- returns information about running transactions.
2690  *
2691  * Similar to GetSnapshotData but returns more information. We include
2692  * all PGPROCs with an assigned TransactionId, even VACUUM processes and
2693  * prepared transactions.
2694  *
2695  * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
2696  * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
2697  * array until the caller has WAL-logged this snapshot, and releases the
2698  * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
2699  * lock is released.
2700  *
2701  * The returned data structure is statically allocated; caller should not
2702  * modify it, and must not assume it is valid past the next call.
2703  *
2704  * This is never executed during recovery so there is no need to look at
2705  * KnownAssignedXids.
2706  *
2707  * Dummy PGPROCs from prepared transaction are included, meaning that this
2708  * may return entries with duplicated TransactionId values coming from
2709  * transaction finishing to prepare. Nothing is done about duplicated
2710  * entries here to not hold on ProcArrayLock more than necessary.
2711  *
2712  * We don't worry about updating other counters, we want to keep this as
2713  * simple as possible and leave GetSnapshotData() as the primary code for
2714  * that bookkeeping.
2715  *
2716  * Note that if any transaction has overflowed its cached subtransactions
2717  * then there is no real need include any subtransactions.
2718  */
2721 {
2722  /* result workspace */
2723  static RunningTransactionsData CurrentRunningXactsData;
2724 
2725  ProcArrayStruct *arrayP = procArray;
2726  TransactionId *other_xids = ProcGlobal->xids;
2727  RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
2728  TransactionId latestCompletedXid;
2729  TransactionId oldestRunningXid;
2730  TransactionId *xids;
2731  int index;
2732  int count;
2733  int subcount;
2734  bool suboverflowed;
2735 
2737 
2738  /*
2739  * Allocating space for maxProcs xids is usually overkill; numProcs would
2740  * be sufficient. But it seems better to do the malloc while not holding
2741  * the lock, so we can't look at numProcs. Likewise, we allocate much
2742  * more subxip storage than is probably needed.
2743  *
2744  * Should only be allocated in bgwriter, since only ever executed during
2745  * checkpoints.
2746  */
2747  if (CurrentRunningXacts->xids == NULL)
2748  {
2749  /*
2750  * First call
2751  */
2752  CurrentRunningXacts->xids = (TransactionId *)
2754  if (CurrentRunningXacts->xids == NULL)
2755  ereport(ERROR,
2756  (errcode(ERRCODE_OUT_OF_MEMORY),
2757  errmsg("out of memory")));
2758  }
2759 
2760  xids = CurrentRunningXacts->xids;
2761 
2762  count = subcount = 0;
2763  suboverflowed = false;
2764 
2765  /*
2766  * Ensure that no xids enter or leave the procarray while we obtain
2767  * snapshot.
2768  */
2769  LWLockAcquire(ProcArrayLock, LW_SHARED);
2770  LWLockAcquire(XidGenLock, LW_SHARED);
2771 
2772  latestCompletedXid =
2774  oldestRunningXid =
2776 
2777  /*
2778  * Spin over procArray collecting all xids
2779  */
2780  for (index = 0; index < arrayP->numProcs; index++)
2781  {
2782  TransactionId xid;
2783 
2784  /* Fetch xid just once - see GetNewTransactionId */
2785  xid = UINT32_ACCESS_ONCE(other_xids[index]);
2786 
2787  /*
2788  * We don't need to store transactions that don't have a TransactionId
2789  * yet because they will not show as running on a standby server.
2790  */
2791  if (!TransactionIdIsValid(xid))
2792  continue;
2793 
2794  /*
2795  * Be careful not to exclude any xids before calculating the values of
2796  * oldestRunningXid and suboverflowed, since these are used to clean
2797  * up transaction information held on standbys.
2798  */
2799  if (TransactionIdPrecedes(xid, oldestRunningXid))
2800  oldestRunningXid = xid;
2801 
2803  suboverflowed = true;
2804 
2805  /*
2806  * If we wished to exclude xids this would be the right place for it.
2807  * Procs with the PROC_IN_VACUUM flag set don't usually assign xids,
2808  * but they do during truncation at the end when they get the lock and
2809  * truncate, so it is not much of a problem to include them if they
2810  * are seen and it is cleaner to include them.
2811  */
2812 
2813  xids[count++] = xid;
2814  }
2815 
2816  /*
2817  * Spin over procArray collecting all subxids, but only if there hasn't
2818  * been a suboverflow.
2819  */
2820  if (!suboverflowed)
2821  {
2822  XidCacheStatus *other_subxidstates = ProcGlobal->subxidStates;
2823 
2824  for (index = 0; index < arrayP->numProcs; index++)
2825  {
2826  int pgprocno = arrayP->pgprocnos[index];
2827  PGPROC *proc = &allProcs[pgprocno];
2828  int nsubxids;
2829 
2830  /*
2831  * Save subtransaction XIDs. Other backends can't add or remove
2832  * entries while we're holding XidGenLock.
2833  */
2834  nsubxids = other_subxidstates[index].count;
2835  if (nsubxids > 0)
2836  {
2837  /* barrier not really required, as XidGenLock is held, but ... */
2838  pg_read_barrier(); /* pairs with GetNewTransactionId */
2839 
2840  memcpy(&xids[count], (void *) proc->subxids.xids,
2841  nsubxids * sizeof(TransactionId));
2842  count += nsubxids;
2843  subcount += nsubxids;
2844 
2845  /*
2846  * Top-level XID of a transaction is always less than any of
2847  * its subxids, so we don't need to check if any of the
2848  * subxids are smaller than oldestRunningXid
2849  */
2850  }
2851  }
2852  }
2853 
2854  /*
2855  * It's important *not* to include the limits set by slots here because
2856  * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
2857  * were to be included here the initial value could never increase because
2858  * of a circular dependency where slots only increase their limits when
2859  * running xacts increases oldestRunningXid and running xacts only
2860  * increases if slots do.
2861  */
2862 
2863  CurrentRunningXacts->xcnt = count - subcount;
2864  CurrentRunningXacts->subxcnt = subcount;
2865  CurrentRunningXacts->subxid_overflow = suboverflowed;
2867  CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
2868  CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
2869 
2870  Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
2871  Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
2872  Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
2873 
2874  /* We don't release the locks here, the caller is responsible for that */
2875 
2876  return CurrentRunningXacts;
2877 }
2878 
2879 /*
2880  * GetOldestActiveTransactionId()
2881  *
2882  * Similar to GetSnapshotData but returns just oldestActiveXid. We include
2883  * all PGPROCs with an assigned TransactionId, even VACUUM processes.
2884  * We look at all databases, though there is no need to include WALSender
2885  * since this has no effect on hot standby conflicts.
2886  *
2887  * This is never executed during recovery so there is no need to look at
2888  * KnownAssignedXids.
2889  *
2890  * We don't worry about updating other counters, we want to keep this as
2891  * simple as possible and leave GetSnapshotData() as the primary code for
2892  * that bookkeeping.
2893  */
2896 {
2897  ProcArrayStruct *arrayP = procArray;
2898  TransactionId *other_xids = ProcGlobal->xids;
2899  TransactionId oldestRunningXid;
2900  int index;
2901 
2903 
2904  /*
2905  * Read nextXid, as the upper bound of what's still active.
2906  *
2907  * Reading a TransactionId is atomic, but we must grab the lock to make
2908  * sure that all XIDs < nextXid are already present in the proc array (or
2909  * have already completed), when we spin over it.
2910  */
2911  LWLockAcquire(XidGenLock, LW_SHARED);
2913  LWLockRelease(XidGenLock);
2914 
2915  /*
2916  * Spin over procArray collecting all xids and subxids.
2917  */
2918  LWLockAcquire(ProcArrayLock, LW_SHARED);
2919  for (index = 0; index < arrayP->numProcs; index++)
2920  {
2921  TransactionId xid;
2922 
2923  /* Fetch xid just once - see GetNewTransactionId */
2924  xid = UINT32_ACCESS_ONCE(other_xids[index]);
2925 
2926  if (!TransactionIdIsNormal(xid))
2927  continue;
2928 
2929  if (TransactionIdPrecedes(xid, oldestRunningXid))
2930  oldestRunningXid = xid;
2931 
2932  /*
2933  * Top-level XID of a transaction is always less than any of its
2934  * subxids, so we don't need to check if any of the subxids are
2935  * smaller than oldestRunningXid
2936  */
2937  }
2938  LWLockRelease(ProcArrayLock);
2939 
2940  return oldestRunningXid;
2941 }
2942 
2943 /*
2944  * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
2945  *
2946  * Returns the oldest xid that we can guarantee not to have been affected by
2947  * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
2948  * transaction aborted. Note that the value can (and most of the time will) be
2949  * much more conservative than what really has been affected by vacuum, but we
2950  * currently don't have better data available.
2951  *
2952  * This is useful to initialize the cutoff xid after which a new changeset
2953  * extraction replication slot can start decoding changes.
2954  *
2955  * Must be called with ProcArrayLock held either shared or exclusively,
2956  * although most callers will want to use exclusive mode since it is expected
2957  * that the caller will immediately use the xid to peg the xmin horizon.
2958  */
2961 {
2962  ProcArrayStruct *arrayP = procArray;
2963  TransactionId oldestSafeXid;
2964  int index;
2965  bool recovery_in_progress = RecoveryInProgress();
2966 
2967  Assert(LWLockHeldByMe(ProcArrayLock));
2968 
2969  /*
2970  * Acquire XidGenLock, so no transactions can acquire an xid while we're
2971  * running. If no transaction with xid were running concurrently a new xid
2972  * could influence the RecentXmin et al.
2973  *
2974  * We initialize the computation to nextXid since that's guaranteed to be
2975  * a safe, albeit pessimal, value.
2976  */
2977  LWLockAcquire(XidGenLock, LW_SHARED);
2979 
2980  /*
2981  * If there's already a slot pegging the xmin horizon, we can start with
2982  * that value, it's guaranteed to be safe since it's computed by this
2983  * routine initially and has been enforced since. We can always use the
2984  * slot's general xmin horizon, but the catalog horizon is only usable
2985  * when only catalog data is going to be looked at.
2986  */
2989  oldestSafeXid))
2990  oldestSafeXid = procArray->replication_slot_xmin;
2991 
2992  if (catalogOnly &&
2995  oldestSafeXid))
2996  oldestSafeXid = procArray->replication_slot_catalog_xmin;
2997 
2998  /*
2999  * If we're not in recovery, we walk over the procarray and collect the
3000  * lowest xid. Since we're called with ProcArrayLock held and have
3001  * acquired XidGenLock, no entries can vanish concurrently, since
3002  * ProcGlobal->xids[i] is only set with XidGenLock held and only cleared
3003  * with ProcArrayLock held.
3004  *
3005  * In recovery we can't lower the safe value besides what we've computed
3006  * above, so we'll have to wait a bit longer there. We unfortunately can
3007  * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
3008  * machinery can miss values and return an older value than is safe.
3009  */
3010  if (!recovery_in_progress)
3011  {
3012  TransactionId *other_xids = ProcGlobal->xids;
3013 
3014  /*
3015  * Spin over procArray collecting min(ProcGlobal->xids[i])
3016  */
3017  for (index = 0; index < arrayP->numProcs; index++)
3018  {
3019  TransactionId xid;
3020 
3021  /* Fetch xid just once - see GetNewTransactionId */
3022  xid = UINT32_ACCESS_ONCE(other_xids[index]);
3023 
3024  if (!TransactionIdIsNormal(xid))
3025  continue;
3026 
3027  if (TransactionIdPrecedes(xid, oldestSafeXid))
3028  oldestSafeXid = xid;
3029  }
3030  }
3031 
3032  LWLockRelease(XidGenLock);
3033 
3034  return oldestSafeXid;
3035 }
3036 
3037 /*
3038  * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
3039  * delaying checkpoint because they have critical actions in progress.
3040  *
3041  * Constructs an array of VXIDs of transactions that are currently in commit
3042  * critical sections, as shown by having delayChkpt set in their PGPROC.
3043  *
3044  * Returns a palloc'd array that should be freed by the caller.
3045  * *nvxids is the number of valid entries.
3046  *
3047  * Note that because backends set or clear delayChkpt without holding any lock,
3048  * the result is somewhat indeterminate, but we don't really care. Even in
3049  * a multiprocessor with delayed writes to shared memory, it should be certain
3050  * that setting of delayChkpt will propagate to shared memory when the backend
3051  * takes a lock, so we cannot fail to see a virtual xact as delayChkpt if
3052  * it's already inserted its commit record. Whether it takes a little while
3053  * for clearing of delayChkpt to propagate is unimportant for correctness.
3054  */
3057 {
3058  VirtualTransactionId *vxids;
3059  ProcArrayStruct *arrayP = procArray;
3060  int count = 0;
3061  int index;
3062 
3063  /* allocate what's certainly enough result space */
3064  vxids = (VirtualTransactionId *)
3065  palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
3066 
3067  LWLockAcquire(ProcArrayLock, LW_SHARED);
3068 
3069  for (index = 0; index < arrayP->numProcs; index++)
3070  {
3071  int pgprocno = arrayP->pgprocnos[index];
3072  PGPROC *proc = &allProcs[pgprocno];
3073 
3074  if (proc->delayChkpt)
3075  {
3076  VirtualTransactionId vxid;
3077 
3078  GET_VXID_FROM_PGPROC(vxid, *proc);
3079  if (VirtualTransactionIdIsValid(vxid))
3080  vxids[count++] = vxid;
3081  }
3082  }
3083 
3084  LWLockRelease(ProcArrayLock);
3085 
3086  *nvxids = count;
3087  return vxids;
3088 }
3089 
3090 /*
3091  * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
3092  *
3093  * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
3094  * of the specified VXIDs are still in critical sections of code.
3095  *
3096  * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
3097  * those numbers should be small enough for it not to be a problem.
3098  */
3099 bool
3101 {
3102  bool result = false;
3103  ProcArrayStruct *arrayP = procArray;
3104  int index;
3105 
3106  LWLockAcquire(ProcArrayLock, LW_SHARED);
3107 
3108  for (index = 0; index < arrayP->numProcs; index++)
3109  {
3110  int pgprocno = arrayP->pgprocnos[index];
3111  PGPROC *proc = &allProcs[pgprocno];
3112  VirtualTransactionId vxid;
3113 
3114  GET_VXID_FROM_PGPROC(vxid, *proc);
3115 
3116  if (proc->delayChkpt && VirtualTransactionIdIsValid(vxid))
3117  {
3118  int i;
3119 
3120  for (i = 0; i < nvxids; i++)
3121  {
3122  if (VirtualTransactionIdEquals(vxid, vxids[i]))
3123  {
3124  result = true;
3125  break;
3126  }
3127  }
3128  if (result)
3129  break;
3130  }
3131  }
3132 
3133  LWLockRelease(ProcArrayLock);
3134 
3135  return result;
3136 }
3137 
3138 /*
3139  * BackendPidGetProc -- get a backend's PGPROC given its PID
3140  *
3141  * Returns NULL if not found. Note that it is up to the caller to be
3142  * sure that the question remains meaningful for long enough for the
3143  * answer to be used ...
3144  */
3145 PGPROC *
3147 {
3148  PGPROC *result;
3149 
3150  if (pid == 0) /* never match dummy PGPROCs */
3151  return NULL;
3152 
3153  LWLockAcquire(ProcArrayLock, LW_SHARED);
3154 
3155  result = BackendPidGetProcWithLock(pid);
3156 
3157  LWLockRelease(ProcArrayLock);
3158 
3159  return result;
3160 }
3161 
3162 /*
3163  * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID
3164  *
3165  * Same as above, except caller must be holding ProcArrayLock. The found
3166  * entry, if any, can be assumed to be valid as long as the lock remains held.
3167  */
3168 PGPROC *
3170 {
3171  PGPROC *result = NULL;
3172  ProcArrayStruct *arrayP = procArray;
3173  int index;
3174 
3175  if (pid == 0) /* never match dummy PGPROCs */
3176  return NULL;
3177 
3178  for (index = 0; index < arrayP->numProcs; index++)
3179  {
3180  PGPROC *proc = &allProcs[arrayP->pgprocnos[index]];
3181 
3182  if (proc->pid == pid)
3183  {
3184  result = proc;
3185  break;
3186  }
3187  }
3188 
3189  return result;
3190 }
3191 
3192 /*
3193  * BackendXidGetPid -- get a backend's pid given its XID
3194  *
3195  * Returns 0 if not found or it's a prepared transaction. Note that
3196  * it is up to the caller to be sure that the question remains
3197  * meaningful for long enough for the answer to be used ...
3198  *
3199  * Only main transaction Ids are considered. This function is mainly
3200  * useful for determining what backend owns a lock.
3201  *
3202  * Beware that not every xact has an XID assigned. However, as long as you
3203  * only call this using an XID found on disk, you're safe.
3204  */
3205 int
3207 {
3208  int result = 0;
3209  ProcArrayStruct *arrayP = procArray;
3210  TransactionId *other_xids = ProcGlobal->xids;
3211  int index;
3212 
3213  if (xid == InvalidTransactionId) /* never match invalid xid */
3214  return 0;
3215 
3216  LWLockAcquire(ProcArrayLock, LW_SHARED);
3217 
3218  for (index = 0; index < arrayP->numProcs; index++)
3219  {
3220  int pgprocno = arrayP->pgprocnos[index];
3221  PGPROC *proc = &allProcs[pgprocno];
3222 
3223  if (other_xids[index] == xid)
3224  {
3225  result = proc->pid;
3226  break;
3227  }
3228  }
3229 
3230  LWLockRelease(ProcArrayLock);
3231 
3232  return result;
3233 }
3234 
3235 /*
3236  * IsBackendPid -- is a given pid a running backend
3237  *
3238  * This is not called by the backend, but is called by external modules.
3239  */
3240 bool
3242 {
3243  return (BackendPidGetProc(pid) != NULL);
3244 }
3245 
3246 
3247 /*
3248  * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
3249  *
3250  * The array is palloc'd. The number of valid entries is returned into *nvxids.
3251  *
3252  * The arguments allow filtering the set of VXIDs returned. Our own process
3253  * is always skipped. In addition:
3254  * If limitXmin is not InvalidTransactionId, skip processes with
3255  * xmin > limitXmin.
3256  * If excludeXmin0 is true, skip processes with xmin = 0.
3257  * If allDbs is false, skip processes attached to other databases.
3258  * If excludeVacuum isn't zero, skip processes for which
3259  * (statusFlags & excludeVacuum) is not zero.
3260  *
3261  * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
3262  * allow skipping backends whose oldest live snapshot is no older than
3263  * some snapshot we have. Since we examine the procarray with only shared
3264  * lock, there are race conditions: a backend could set its xmin just after
3265  * we look. Indeed, on multiprocessors with weak memory ordering, the
3266  * other backend could have set its xmin *before* we look. We know however
3267  * that such a backend must have held shared ProcArrayLock overlapping our
3268  * own hold of ProcArrayLock, else we would see its xmin update. Therefore,
3269  * any snapshot the other backend is taking concurrently with our scan cannot
3270  * consider any transactions as still running that we think are committed
3271  * (since backends must hold ProcArrayLock exclusive to commit).
3272  */
3274 GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
3275  bool allDbs, int excludeVacuum,
3276  int *nvxids)
3277 {
3278  VirtualTransactionId *vxids;
3279  ProcArrayStruct *arrayP = procArray;
3280  int count = 0;
3281  int index;
3282 
3283  /* allocate what's certainly enough result space */
3284  vxids = (VirtualTransactionId *)
3285  palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
3286 
3287  LWLockAcquire(ProcArrayLock, LW_SHARED);
3288 
3289  for (index = 0; index < arrayP->numProcs; index++)
3290  {
3291  int pgprocno = arrayP->pgprocnos[index];
3292  PGPROC *proc = &allProcs[pgprocno];
3293  uint8 statusFlags = ProcGlobal->statusFlags[index];
3294 
3295  if (proc == MyProc)
3296  continue;
3297 
3298  if (excludeVacuum & statusFlags)
3299  continue;
3300 
3301  if (allDbs || proc->databaseId == MyDatabaseId)
3302  {
3303  /* Fetch xmin just once - might change on us */
3304  TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
3305 
3306  if (excludeXmin0 && !TransactionIdIsValid(pxmin))
3307  continue;
3308 
3309  /*
3310  * InvalidTransactionId precedes all other XIDs, so a proc that
3311  * hasn't set xmin yet will not be rejected by this test.
3312  */
3313  if (!TransactionIdIsValid(limitXmin) ||
3314  TransactionIdPrecedesOrEquals(pxmin, limitXmin))
3315  {
3316  VirtualTransactionId vxid;
3317 
3318  GET_VXID_FROM_PGPROC(vxid, *proc);
3319  if (VirtualTransactionIdIsValid(vxid))
3320  vxids[count++] = vxid;
3321  }
3322  }
3323  }
3324 
3325  LWLockRelease(ProcArrayLock);
3326 
3327  *nvxids = count;
3328  return vxids;
3329 }
3330 
3331 /*
3332  * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
3333  *
3334  * Usage is limited to conflict resolution during recovery on standby servers.
3335  * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId
3336  * in cases where we cannot accurately determine a value for latestRemovedXid.
3337  *
3338  * If limitXmin is InvalidTransactionId then we want to kill everybody,
3339  * so we're not worried if they have a snapshot or not, nor does it really
3340  * matter what type of lock we hold.
3341  *
3342  * All callers that are checking xmins always now supply a valid and useful
3343  * value for limitXmin. The limitXmin is always lower than the lowest
3344  * numbered KnownAssignedXid that is not already a FATAL error. This is
3345  * because we only care about cleanup records that are cleaning up tuple
3346  * versions from committed transactions. In that case they will only occur
3347  * at the point where the record is less than the lowest running xid. That
3348  * allows us to say that if any backend takes a snapshot concurrently with
3349  * us then the conflict assessment made here would never include the snapshot
3350  * that is being derived. So we take LW_SHARED on the ProcArray and allow
3351  * concurrent snapshots when limitXmin is valid. We might think about adding
3352  * Assert(limitXmin < lowest(KnownAssignedXids))
3353  * but that would not be true in the case of FATAL errors lagging in array,
3354  * but we already know those are bogus anyway, so we skip that test.
3355  *
3356  * If dbOid is valid we skip backends attached to other databases.
3357  *
3358  * Be careful to *not* pfree the result from this function. We reuse
3359  * this array sufficiently often that we use malloc for the result.
3360  */
3363 {
3364  static VirtualTransactionId *vxids;
3365  ProcArrayStruct *arrayP = procArray;
3366  int count = 0;
3367  int index;
3368 
3369  /*
3370  * If first time through, get workspace to remember main XIDs in. We
3371  * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
3372  * result space, remembering room for a terminator.
3373  */
3374  if (vxids == NULL)
3375  {
3376  vxids = (VirtualTransactionId *)
3377  malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
3378  if (vxids == NULL)
3379  ereport(ERROR,
3380  (errcode(ERRCODE_OUT_OF_MEMORY),
3381  errmsg("out of memory")));
3382  }
3383 
3384  LWLockAcquire(ProcArrayLock, LW_SHARED);
3385 
3386  for (index = 0; index < arrayP->numProcs; index++)
3387  {
3388  int pgprocno = arrayP->pgprocnos[index];
3389  PGPROC *proc = &allProcs[pgprocno];
3390 
3391  /* Exclude prepared transactions */
3392  if (proc->pid == 0)
3393  continue;
3394 
3395  if (!OidIsValid(dbOid) ||
3396  proc->databaseId == dbOid)
3397  {
3398  /* Fetch xmin just once - can't change on us, but good coding */
3399  TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
3400 
3401  /*
3402  * We ignore an invalid pxmin because this means that backend has
3403  * no snapshot currently. We hold a Share lock to avoid contention
3404  * with users taking snapshots. That is not a problem because the
3405  * current xmin is always at least one higher than the latest
3406  * removed xid, so any new snapshot would never conflict with the
3407  * test here.
3408  */
3409  if (!TransactionIdIsValid(limitXmin) ||
3410  (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin)))
3411  {
3412  VirtualTransactionId vxid;
3413 
3414  GET_VXID_FROM_PGPROC(vxid, *proc);
3415  if (VirtualTransactionIdIsValid(vxid))
3416  vxids[count++] = vxid;
3417  }
3418  }
3419  }
3420 
3421  LWLockRelease(ProcArrayLock);
3422 
3423  /* add the terminator */
3424  vxids[count].backendId = InvalidBackendId;
3426 
3427  return vxids;
3428 }
3429 
3430 /*
3431  * CancelVirtualTransaction - used in recovery conflict processing
3432  *
3433  * Returns pid of the process signaled, or 0 if not found.
3434  */
3435 pid_t
3437 {
3438  return SignalVirtualTransaction(vxid, sigmode, true);
3439 }
3440 
3441 pid_t
3443  bool conflictPending)
3444 {
3445  ProcArrayStruct *arrayP = procArray;
3446  int index;
3447  pid_t pid = 0;
3448 
3449  LWLockAcquire(ProcArrayLock, LW_SHARED);
3450 
3451  for (index = 0; index < arrayP->numProcs; index++)
3452  {
3453  int pgprocno = arrayP->pgprocnos[index];
3454  PGPROC *proc = &allProcs[pgprocno];
3455  VirtualTransactionId procvxid;
3456 
3457  GET_VXID_FROM_PGPROC(procvxid, *proc);
3458 
3459  if (procvxid.backendId == vxid.backendId &&
3460  procvxid.localTransactionId == vxid.localTransactionId)
3461  {
3462  proc->recoveryConflictPending = conflictPending;
3463  pid = proc->pid;
3464  if (pid != 0)
3465  {
3466  /*
3467  * Kill the pid if it's still here. If not, that's what we
3468  * wanted so ignore any errors.
3469  */
3470  (void) SendProcSignal(pid, sigmode, vxid.backendId);
3471  }
3472  break;
3473  }
3474  }
3475 
3476  LWLockRelease(ProcArrayLock);
3477 
3478  return pid;
3479 }
3480 
3481 /*
3482  * MinimumActiveBackends --- count backends (other than myself) that are
3483  * in active transactions. Return true if the count exceeds the
3484  * minimum threshold passed. This is used as a heuristic to decide if
3485  * a pre-XLOG-flush delay is worthwhile during commit.
3486  *
3487  * Do not count backends that are blocked waiting for locks, since they are
3488  * not going to get to run until someone else commits.
3489  */
3490 bool
3492 {
3493  ProcArrayStruct *arrayP = procArray;
3494  int count = 0;
3495  int index;
3496 
3497  /* Quick short-circuit if no minimum is specified */
3498  if (min == 0)
3499  return true;
3500 
3501  /*
3502  * Note: for speed, we don't acquire ProcArrayLock. This is a little bit
3503  * bogus, but since we are only testing fields for zero or nonzero, it
3504  * should be OK. The result is only used for heuristic purposes anyway...
3505  */
3506  for (index = 0; index < arrayP->numProcs; index++)
3507  {
3508  int pgprocno = arrayP->pgprocnos[index];
3509  PGPROC *proc = &allProcs[pgprocno];
3510 
3511  /*
3512  * Since we're not holding a lock, need to be prepared to deal with
3513  * garbage, as someone could have incremented numProcs but not yet
3514  * filled the structure.
3515  *
3516  * If someone just decremented numProcs, 'proc' could also point to a
3517  * PGPROC entry that's no longer in the array. It still points to a
3518  * PGPROC struct, though, because freed PGPROC entries just go to the
3519  * free list and are recycled. Its contents are nonsense in that case,
3520  * but that's acceptable for this function.
3521  */
3522  if (pgprocno == -1)
3523  continue; /* do not count deleted entries */
3524  if (proc == MyProc)
3525  continue; /* do not count myself */
3526  if (proc->xid == InvalidTransactionId)
3527  continue; /* do not count if no XID assigned */
3528  if (proc->pid == 0)
3529  continue; /* do not count prepared xacts */
3530  if (proc->waitLock != NULL)
3531  continue; /* do not count if blocked on a lock */
3532  count++;
3533  if (count >= min)
3534  break;
3535  }
3536 
3537  return count >= min;
3538 }
3539 
3540 /*
3541  * CountDBBackends --- count backends that are using specified database
3542  */
3543 int
3545 {
3546  ProcArrayStruct *arrayP = procArray;
3547  int count = 0;
3548  int index;
3549 
3550  LWLockAcquire(ProcArrayLock, LW_SHARED);
3551 
3552  for (index = 0; index < arrayP->numProcs; index++)
3553  {
3554  int pgprocno = arrayP->pgprocnos[index];
3555  PGPROC *proc = &allProcs[pgprocno];
3556 
3557  if (proc->pid == 0)
3558  continue; /* do not count prepared xacts */
3559  if (!OidIsValid(databaseid) ||
3560  proc->databaseId == databaseid)
3561  count++;
3562  }
3563 
3564  LWLockRelease(ProcArrayLock);
3565 
3566  return count;
3567 }
3568 
3569 /*
3570  * CountDBConnections --- counts database backends ignoring any background
3571  * worker processes
3572  */
3573 int
3575 {
3576  ProcArrayStruct *arrayP = procArray;
3577  int count = 0;
3578  int index;
3579 
3580  LWLockAcquire(ProcArrayLock, LW_SHARED);
3581 
3582  for (index = 0; index < arrayP->numProcs; index++)
3583  {
3584  int pgprocno = arrayP->pgprocnos[index];
3585  PGPROC *proc = &allProcs[pgprocno];
3586 
3587  if (proc->pid == 0)
3588  continue; /* do not count prepared xacts */
3589  if (proc->isBackgroundWorker)
3590  continue; /* do not count background workers */
3591  if (!OidIsValid(databaseid) ||
3592  proc->databaseId == databaseid)
3593  count++;
3594  }
3595 
3596  LWLockRelease(ProcArrayLock);
3597 
3598  return count;
3599 }
3600 
3601 /*
3602  * CancelDBBackends --- cancel backends that are using specified database
3603  */
3604 void
3605 CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
3606 {
3607  ProcArrayStruct *arrayP = procArray;
3608  int index;
3609 
3610  /* tell all backends to die */
3611  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3612 
3613  for (index = 0; index < arrayP->numProcs; index++)
3614  {
3615  int pgprocno = arrayP->pgprocnos[index];
3616  PGPROC *proc = &allProcs[pgprocno];
3617 
3618  if (databaseid == InvalidOid || proc->databaseId == databaseid)
3619  {
3620  VirtualTransactionId procvxid;
3621  pid_t pid;
3622 
3623  GET_VXID_FROM_PGPROC(procvxid, *proc);
3624 
3625  proc->recoveryConflictPending = conflictPending;
3626  pid = proc->pid;
3627  if (pid != 0)
3628  {
3629  /*
3630  * Kill the pid if it's still here. If not, that's what we
3631  * wanted so ignore any errors.
3632  */
3633  (void) SendProcSignal(pid, sigmode, procvxid.backendId);
3634  }
3635  }
3636  }
3637 
3638  LWLockRelease(ProcArrayLock);
3639 }
3640 
3641 /*
3642  * CountUserBackends --- count backends that are used by specified user
3643  */
3644 int
3646 {
3647  ProcArrayStruct *arrayP = procArray;
3648  int count = 0;
3649  int index;
3650 
3651  LWLockAcquire(ProcArrayLock, LW_SHARED);
3652 
3653  for (index = 0; index < arrayP->numProcs; index++)
3654  {
3655  int pgprocno = arrayP->pgprocnos[index];
3656  PGPROC *proc = &allProcs[pgprocno];
3657 
3658  if (proc->pid == 0)
3659  continue; /* do not count prepared xacts */
3660  if (proc->isBackgroundWorker)
3661  continue; /* do not count background workers */
3662  if (proc->roleId == roleid)
3663  count++;
3664  }
3665 
3666  LWLockRelease(ProcArrayLock);
3667 
3668  return count;
3669 }
3670 
3671 /*
3672  * CountOtherDBBackends -- check for other backends running in the given DB
3673  *
3674  * If there are other backends in the DB, we will wait a maximum of 5 seconds
3675  * for them to exit. Autovacuum backends are encouraged to exit early by
3676  * sending them SIGTERM, but normal user backends are just waited for.
3677  *
3678  * The current backend is always ignored; it is caller's responsibility to
3679  * check whether the current backend uses the given DB, if it's important.
3680  *
3681  * Returns true if there are (still) other backends in the DB, false if not.
3682  * Also, *nbackends and *nprepared are set to the number of other backends
3683  * and prepared transactions in the DB, respectively.
3684  *
3685  * This function is used to interlock DROP DATABASE and related commands
3686  * against there being any active backends in the target DB --- dropping the
3687  * DB while active backends remain would be a Bad Thing. Note that we cannot
3688  * detect here the possibility of a newly-started backend that is trying to
3689  * connect to the doomed database, so additional interlocking is needed during
3690  * backend startup. The caller should normally hold an exclusive lock on the
3691  * target DB before calling this, which is one reason we mustn't wait
3692  * indefinitely.
3693  */
3694 bool
3695 CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
3696 {
3697  ProcArrayStruct *arrayP = procArray;
3698 
3699 #define MAXAUTOVACPIDS 10 /* max autovacs to SIGTERM per iteration */
3700  int autovac_pids[MAXAUTOVACPIDS];
3701  int tries;
3702 
3703  /* 50 tries with 100ms sleep between tries makes 5 sec total wait */
3704  for (tries = 0; tries < 50; tries++)
3705  {
3706  int nautovacs = 0;
3707  bool found = false;
3708  int index;
3709 
3711 
3712  *nbackends = *nprepared = 0;
3713 
3714  LWLockAcquire(ProcArrayLock, LW_SHARED);
3715 
3716  for (index = 0; index < arrayP->numProcs; index++)
3717  {
3718  int pgprocno = arrayP->pgprocnos[index];
3719  PGPROC *proc = &allProcs[pgprocno];
3720  uint8 statusFlags = ProcGlobal->statusFlags[index];
3721 
3722  if (proc->databaseId != databaseId)
3723  continue;
3724  if (proc == MyProc)
3725  continue;
3726 
3727  found = true;
3728 
3729  if (proc->pid == 0)
3730  (*nprepared)++;
3731  else
3732  {
3733  (*nbackends)++;
3734  if ((statusFlags & PROC_IS_AUTOVACUUM) &&
3735  nautovacs < MAXAUTOVACPIDS)
3736  autovac_pids[nautovacs++] = proc->pid;
3737  }
3738  }
3739 
3740  LWLockRelease(ProcArrayLock);
3741 
3742  if (!found)
3743  return false; /* no conflicting backends, so done */
3744 
3745  /*
3746  * Send SIGTERM to any conflicting autovacuums before sleeping. We
3747  * postpone this step until after the loop because we don't want to
3748  * hold ProcArrayLock while issuing kill(). We have no idea what might
3749  * block kill() inside the kernel...
3750  */
3751  for (index = 0; index < nautovacs; index++)
3752  (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */
3753 
3754  /* sleep, then try again */
3755  pg_usleep(100 * 1000L); /* 100ms */
3756  }
3757 
3758  return true; /* timed out, still conflicts */
3759 }
3760 
3761 /*
3762  * Terminate existing connections to the specified database. This routine
3763  * is used by the DROP DATABASE command when user has asked to forcefully
3764  * drop the database.
3765  *
3766  * The current backend is always ignored; it is caller's responsibility to
3767  * check whether the current backend uses the given DB, if it's important.
3768  *
3769  * It doesn't allow to terminate the connections even if there is a one
3770  * backend with the prepared transaction in the target database.
3771  */
3772 void
3774 {
3775  ProcArrayStruct *arrayP = procArray;
3776  List *pids = NIL;
3777  int nprepared = 0;
3778  int i;
3779 
3780  LWLockAcquire(ProcArrayLock, LW_SHARED);
3781 
3782  for (i = 0; i < procArray->numProcs; i++)
3783  {
3784  int pgprocno = arrayP->pgprocnos[i];
3785  PGPROC *proc = &allProcs[pgprocno];
3786 
3787  if (proc->databaseId != databaseId)
3788  continue;
3789  if (proc == MyProc)
3790  continue;
3791 
3792  if (proc->pid != 0)
3793  pids = lappend_int(pids, proc->pid);
3794  else
3795  nprepared++;
3796  }
3797 
3798  LWLockRelease(ProcArrayLock);
3799 
3800  if (nprepared > 0)
3801  ereport(ERROR,
3802  (errcode(ERRCODE_OBJECT_IN_USE),
3803  errmsg("database \"%s\" is being used by prepared transactions",
3804  get_database_name(databaseId)),
3805  errdetail_plural("There is %d prepared transaction using the database.",
3806  "There are %d prepared transactions using the database.",
3807  nprepared,
3808  nprepared)));
3809 
3810  if (pids)
3811  {
3812  ListCell *lc;
3813 
3814  /*
3815  * Check whether we have the necessary rights to terminate other
3816  * sessions. We don't terminate any session until we ensure that we
3817  * have rights on all the sessions to be terminated. These checks are
3818  * the same as we do in pg_terminate_backend.
3819  *
3820  * In this case we don't raise some warnings - like "PID %d is not a
3821  * PostgreSQL server process", because for us already finished session
3822  * is not a problem.
3823  */
3824  foreach(lc, pids)
3825  {
3826  int pid = lfirst_int(lc);
3827  PGPROC *proc = BackendPidGetProc(pid);
3828 
3829  if (proc != NULL)
3830  {
3831  /* Only allow superusers to signal superuser-owned backends. */
3832  if (superuser_arg(proc->roleId) && !superuser())
3833  ereport(ERROR,
3834  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3835  errmsg("must be a superuser to terminate superuser process")));
3836 
3837  /* Users can signal backends they have role membership in. */
3838  if (!has_privs_of_role(GetUserId(), proc->roleId) &&
3839  !has_privs_of_role(GetUserId(), ROLE_PG_SIGNAL_BACKEND))
3840  ereport(ERROR,
3841  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3842  errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend")));
3843  }
3844  }
3845 
3846  /*
3847  * There's a race condition here: once we release the ProcArrayLock,
3848  * it's possible for the session to exit before we issue kill. That
3849  * race condition possibility seems too unlikely to worry about. See
3850  * pg_signal_backend.
3851  */
3852  foreach(lc, pids)
3853  {
3854  int pid = lfirst_int(lc);
3855  PGPROC *proc = BackendPidGetProc(pid);
3856 
3857  if (proc != NULL)
3858  {
3859  /*
3860  * If we have setsid(), signal the backend's whole process
3861  * group
3862  */
3863 #ifdef HAVE_SETSID
3864  (void) kill(-pid, SIGTERM);
3865 #else
3866  (void) kill(pid, SIGTERM);
3867 #endif
3868  }
3869  }
3870  }
3871 }
3872 
3873 /*
3874  * ProcArraySetReplicationSlotXmin
3875  *
3876  * Install limits to future computations of the xmin horizon to prevent vacuum
3877  * and HOT pruning from removing affected rows still needed by clients with
3878  * replication slots.
3879  */
3880 void
3882  bool already_locked)
3883 {
3884  Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
3885 
3886  if (!already_locked)
3887  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3888 
3890  procArray->replication_slot_catalog_xmin = catalog_xmin;
3891 
3892  if (!already_locked)
3893  LWLockRelease(ProcArrayLock);
3894 }
3895 
3896 /*
3897  * ProcArrayGetReplicationSlotXmin
3898  *
3899  * Return the current slot xmin limits. That's useful to be able to remove
3900  * data that's older than those limits.
3901  */
3902 void
3904  TransactionId *catalog_xmin)
3905 {
3906  LWLockAcquire(ProcArrayLock, LW_SHARED);
3907 
3908  if (xmin != NULL)
3910 
3911  if (catalog_xmin != NULL)
3912  *catalog_xmin = procArray->replication_slot_catalog_xmin;
3913 
3914  LWLockRelease(ProcArrayLock);
3915 }
3916 
3917 /*
3918  * XidCacheRemoveRunningXids
3919  *
3920  * Remove a bunch of TransactionIds from the list of known-running
3921  * subtransactions for my backend. Both the specified xid and those in
3922  * the xids[] array (of length nxids) are removed from the subxids cache.
3923  * latestXid must be the latest XID among the group.
3924  */
3925 void
3927  int nxids, const TransactionId *xids,
3928  TransactionId latestXid)
3929 {
3930  int i,
3931  j;
3932  XidCacheStatus *mysubxidstat;
3933 
3935 
3936  /*
3937  * We must hold ProcArrayLock exclusively in order to remove transactions
3938  * from the PGPROC array. (See src/backend/access/transam/README.) It's
3939  * possible this could be relaxed since we know this routine is only used
3940  * to abort subtransactions, but pending closer analysis we'd best be
3941  * conservative.
3942  *
3943  * Note that we do not have to be careful about memory ordering of our own
3944  * reads wrt. GetNewTransactionId() here - only this process can modify
3945  * relevant fields of MyProc/ProcGlobal->xids[]. But we do have to be
3946  * careful about our own writes being well ordered.
3947  */
3948  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3949 
3950  mysubxidstat = &ProcGlobal->subxidStates[MyProc->pgxactoff];
3951 
3952  /*
3953  * Under normal circumstances xid and xids[] will be in increasing order,
3954  * as will be the entries in subxids. Scan backwards to avoid O(N^2)
3955  * behavior when removing a lot of xids.
3956  */
3957  for (i = nxids - 1; i >= 0; i--)
3958  {
3959  TransactionId anxid = xids[i];
3960 
3961  for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
3962  {
3963  if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
3964  {
3966  pg_write_barrier();
3967  mysubxidstat->count--;
3969  break;
3970  }
3971  }
3972 
3973  /*
3974  * Ordinarily we should have found it, unless the cache has
3975  * overflowed. However it's also possible for this routine to be
3976  * invoked multiple times for the same subtransaction, in case of an
3977  * error during AbortSubTransaction. So instead of Assert, emit a
3978  * debug warning.
3979  */
3980  if (j < 0 && !MyProc->subxidStatus.overflowed)
3981  elog(WARNING, "did not find subXID %u in MyProc", anxid);
3982  }
3983 
3984  for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
3985  {
3986  if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
3987  {
3989  pg_write_barrier();
3990  mysubxidstat->count--;
3992  break;
3993  }
3994  }
3995  /* Ordinarily we should have found it, unless the cache has overflowed */
3996  if (j < 0 && !MyProc->subxidStatus.overflowed)
3997  elog(WARNING, "did not find subXID %u in MyProc", xid);
3998 
3999  /* Also advance global latestCompletedXid while holding the lock */
4000  MaintainLatestCompletedXid(latestXid);
4001 
4002  /* ... and xactCompletionCount */
4004 
4005  LWLockRelease(ProcArrayLock);
4006 }
4007 
4008 #ifdef XIDCACHE_DEBUG
4009 
4010 /*
4011  * Print stats about effectiveness of XID cache
4012  */
4013 static void
4014 DisplayXidCache(void)
4015 {
4016  fprintf(stderr,
4017  "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n",
4018  xc_by_recent_xmin,
4019  xc_by_known_xact,
4020  xc_by_my_xact,
4021  xc_by_latest_xid,
4022  xc_by_main_xid,
4023  xc_by_child_xid,
4024  xc_by_known_assigned,
4025  xc_no_overflow,
4026  xc_slow_answer);
4027 }
4028 #endif /* XIDCACHE_DEBUG */
4029 
4030 /*
4031  * If rel != NULL, return test state appropriate for relation, otherwise
4032  * return state usable for all relations. The latter may consider XIDs as
4033  * not-yet-visible-to-everyone that a state for a specific relation would
4034  * already consider visible-to-everyone.
4035  *
4036  * This needs to be called while a snapshot is active or registered, otherwise
4037  * there are wraparound and other dangers.
4038  *
4039  * See comment for GlobalVisState for details.
4040  */
4043 {
4044  GlobalVisState *state = NULL;
4045 
4046  /* XXX: we should assert that a snapshot is pushed or registered */
4047  Assert(RecentXmin);
4048 
4049  switch (GlobalVisHorizonKindForRel(rel))
4050  {
4051  case VISHORIZON_SHARED:
4053  break;
4054  case VISHORIZON_CATALOG:
4056  break;
4057  case VISHORIZON_DATA:
4059  break;
4060  case VISHORIZON_TEMP:
4062  break;
4063  }
4064 
4065  Assert(FullTransactionIdIsValid(state->definitely_needed) &&
4066  FullTransactionIdIsValid(state->maybe_needed));
4067 
4068  return state;
4069 }
4070 
4071 /*
4072  * Return true if it's worth updating the accurate maybe_needed boundary.
4073  *
4074  * As it is somewhat expensive to determine xmin horizons, we don't want to
4075  * repeatedly do so when there is a low likelihood of it being beneficial.
4076  *
4077  * The current heuristic is that we update only if RecentXmin has changed
4078  * since the last update. If the oldest currently running transaction has not
4079  * finished, it is unlikely that recomputing the horizon would be useful.
4080  */
4081 static bool
4083 {
4084  /* hasn't been updated yet */
4086  return true;
4087 
4088  /*
4089  * If the maybe_needed/definitely_needed boundaries are the same, it's
4090  * unlikely to be beneficial to refresh boundaries.
4091  */
4092  if (FullTransactionIdFollowsOrEquals(state->maybe_needed,
4093  state->definitely_needed))
4094  return false;
4095 
4096  /* does the last snapshot built have a different xmin? */
4098 }
4099 
4100 static void
4102 {
4105  horizons->shared_oldest_nonremovable);
4108  horizons->catalog_oldest_nonremovable);
4111  horizons->data_oldest_nonremovable);
4114  horizons->temp_oldest_nonremovable);
4115 
4116  /*
4117  * In longer running transactions it's possible that transactions we
4118  * previously needed to treat as running aren't around anymore. So update
4119  * definitely_needed to not be earlier than maybe_needed.
4120  */
4131 
4133 }
4134 
4135 /*
4136  * Update boundaries in GlobalVis{Shared,Catalog, Data}Rels
4137  * using ComputeXidHorizons().
4138  */
4139 static void
4141 {
4142  ComputeXidHorizonsResult horizons;
4143 
4144  /* updates the horizons as a side-effect */
4145  ComputeXidHorizons(&horizons);
4146 }
4147 
4148 /*
4149  * Return true if no snapshot still considers fxid to be running.
4150  *
4151  * The state passed needs to have been initialized for the relation fxid is
4152  * from (NULL is also OK), otherwise the result may not be correct.
4153  *
4154  * See comment for GlobalVisState for details.
4155  */
4156 bool
4158  FullTransactionId fxid)
4159 {
4160  /*
4161  * If fxid is older than maybe_needed bound, it definitely is visible to
4162  * everyone.
4163  */
4164  if (FullTransactionIdPrecedes(fxid, state->maybe_needed))
4165  return true;
4166 
4167  /*
4168  * If fxid is >= definitely_needed bound, it is very likely to still be
4169  * considered running.
4170  */
4171  if (FullTransactionIdFollowsOrEquals(fxid, state->definitely_needed))
4172  return false;
4173 
4174  /*
4175  * fxid is between maybe_needed and definitely_needed, i.e. there might or
4176  * might not exist a snapshot considering fxid running. If it makes sense,
4177  * update boundaries and recheck.
4178  */
4180  {
4181  GlobalVisUpdate();
4182 
4183  Assert(FullTransactionIdPrecedes(fxid, state->definitely_needed));
4184 
4185  return FullTransactionIdPrecedes(fxid, state->maybe_needed);
4186  }
4187  else
4188  return false;
4189 }
4190 
4191 /*
4192  * Wrapper around GlobalVisTestIsRemovableFullXid() for 32bit xids.
4193  *
4194  * It is crucial that this only gets called for xids from a source that
4195  * protects against xid wraparounds (e.g. from a table and thus protected by
4196  * relfrozenxid).
4197  */
4198 bool
4200 {
4201  FullTransactionId fxid;
4202 
4203  /*
4204  * Convert 32 bit argument to FullTransactionId. We can do so safely
4205  * because we know the xid has to, at the very least, be between
4206  * [oldestXid, nextFullXid), i.e. within 2 billion of xid. To avoid taking
4207  * a lock to determine either, we can just compare with
4208  * state->definitely_needed, which was based on those value at the time
4209  * the current snapshot was built.
4210  */
4211  fxid = FullXidRelativeTo(state->definitely_needed, xid);
4212 
4213  return GlobalVisTestIsRemovableFullXid(state, fxid);
4214 }
4215 
4216 /*
4217  * Return FullTransactionId below which all transactions are not considered
4218  * running anymore.
4219  *
4220  * Note: This is less efficient than testing with
4221  * GlobalVisTestIsRemovableFullXid as it likely requires building an accurate
4222  * cutoff, even in the case all the XIDs compared with the cutoff are outside
4223  * [maybe_needed, definitely_needed).
4224  */
4227 {
4228  /* acquire accurate horizon if not already done */
4230  GlobalVisUpdate();
4231 
4232  return state->maybe_needed;
4233 }
4234 
4235 /* Convenience wrapper around GlobalVisTestNonRemovableFullHorizon */
4238 {
4239  FullTransactionId cutoff;
4240 
4242 
4243  return XidFromFullTransactionId(cutoff);
4244 }
4245 
4246 /*
4247  * Convenience wrapper around GlobalVisTestFor() and
4248  * GlobalVisTestIsRemovableFullXid(), see their comments.
4249  */
4250 bool
4252 {
4254 
4255  state = GlobalVisTestFor(rel);
4256 
4257  return GlobalVisTestIsRemovableFullXid(state, fxid);
4258 }
4259 
4260 /*
4261  * Convenience wrapper around GlobalVisTestFor() and
4262  * GlobalVisTestIsRemovableXid(), see their comments.
4263  */
4264 bool
4266 {
4268 
4269  state = GlobalVisTestFor(rel);
4270 
4271  return GlobalVisTestIsRemovableXid(state, xid);
4272 }
4273 
4274 /*
4275  * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it
4276  * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel).
4277  *
4278  * Be very careful about when to use this function. It can only safely be used
4279  * when there is a guarantee that xid is within MaxTransactionId / 2 xids of
4280  * rel. That e.g. can be guaranteed if the caller assures a snapshot is
4281  * held by the backend and xid is from a table (where vacuum/freezing ensures
4282  * the xid has to be within that range), or if xid is from the procarray and
4283  * prevents xid wraparound that way.
4284  */
4285 static inline FullTransactionId
4287 {
4288  TransactionId rel_xid = XidFromFullTransactionId(rel);
4289 
4291  Assert(TransactionIdIsValid(rel_xid));
4292 
4293  /* not guaranteed to find issues, but likely to catch mistakes */
4295 
4297  + (int32) (xid - rel_xid));
4298 }
4299 
4300 
4301 /* ----------------------------------------------
4302  * KnownAssignedTransactionIds sub-module
4303  * ----------------------------------------------
4304  */
4305 
4306 /*
4307  * In Hot Standby mode, we maintain a list of transactions that are (or were)
4308  * running on the primary at the current point in WAL. These XIDs must be
4309  * treated as running by standby transactions, even though they are not in
4310  * the standby server's PGPROC array.
4311  *
4312  * We record all XIDs that we know have been assigned. That includes all the
4313  * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
4314  * been assigned. We can deduce the existence of unobserved XIDs because we
4315  * know XIDs are assigned in sequence, with no gaps. The KnownAssignedXids
4316  * list expands as new XIDs are observed or inferred, and contracts when
4317  * transaction completion records arrive.
4318  *
4319  * During hot standby we do not fret too much about the distinction between
4320  * top-level XIDs and subtransaction XIDs. We store both together in the
4321  * KnownAssignedXids list. In backends, this is copied into snapshots in
4322  * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot()
4323  * doesn't care about the distinction either. Subtransaction XIDs are
4324  * effectively treated as top-level XIDs and in the typical case pg_subtrans
4325  * links are *not* maintained (which does not affect visibility).
4326  *
4327  * We have room in KnownAssignedXids and in snapshots to hold maxProcs *
4328  * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every primary transaction must
4329  * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at
4330  * least every PGPROC_MAX_CACHED_SUBXIDS. When we receive one of these
4331  * records, we mark the subXIDs as children of the top XID in pg_subtrans,
4332  * and then remove them from KnownAssignedXids. This prevents overflow of
4333  * KnownAssignedXids and snapshots, at the cost that status checks for these
4334  * subXIDs will take a slower path through TransactionIdIsInProgress().
4335  * This means that KnownAssignedXids is not necessarily complete for subXIDs,
4336  * though it should be complete for top-level XIDs; this is the same situation
4337  * that holds with respect to the PGPROC entries in normal running.
4338  *
4339  * When we throw away subXIDs from KnownAssignedXids, we need to keep track of
4340  * that, similarly to tracking overflow of a PGPROC's subxids array. We do
4341  * that by remembering the lastOverflowedXid, ie the last thrown-away subXID.
4342  * As long as that is within the range of interesting XIDs, we have to assume
4343  * that subXIDs are missing from snapshots. (Note that subXID overflow occurs
4344  * on primary when 65th subXID arrives, whereas on standby it occurs when 64th
4345  * subXID arrives - that is not an error.)
4346  *
4347  * Should a backend on primary somehow disappear before it can write an abort
4348  * record, then we just leave those XIDs in KnownAssignedXids. They actually
4349  * aborted but we think they were running; the distinction is irrelevant
4350  * because either way any changes done by the transaction are not visible to
4351  * backends in the standby. We prune KnownAssignedXids when
4352  * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the
4353  * array due to such dead XIDs.
4354  */
4355 
4356 /*
4357  * RecordKnownAssignedTransactionIds
4358  * Record the given XID in KnownAssignedXids, as well as any preceding
4359  * unobserved XIDs.
4360  *
4361  * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
4362  * associated with a transaction. Must be called for each record after we
4363  * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
4364  *
4365  * Called during recovery in analogy with and in place of GetNewTransactionId()
4366  */
4367 void
4369 {
4373 
4374  elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
4375  xid, latestObservedXid);
4376 
4377  /*
4378  * When a newly observed xid arrives, it is frequently the case that it is
4379  * *not* the next xid in sequence. When this occurs, we must treat the
4380  * intervening xids as running also.
4381  */
4383  {
4384  TransactionId next_expected_xid;
4385 
4386  /*
4387  * Extend subtrans like we do in GetNewTransactionId() during normal
4388  * operation using individual extend steps. Note that we do not need
4389  * to extend clog since its extensions are WAL logged.
4390  *
4391  * This part has to be done regardless of standbyState since we
4392  * immediately start assigning subtransactions to their toplevel
4393  * transactions.
4394  */
4395  next_expected_xid = latestObservedXid;
4396  while (TransactionIdPrecedes(next_expected_xid, xid))
4397  {
4398  TransactionIdAdvance(next_expected_xid);
4399  ExtendSUBTRANS(next_expected_xid);
4400  }
4401  Assert(next_expected_xid == xid);
4402 
4403  /*
4404  * If the KnownAssignedXids machinery isn't up yet, there's nothing
4405  * more to do since we don't track assigned xids yet.
4406  */
4408  {
4409  latestObservedXid = xid;
4410  return;
4411  }
4412 
4413  /*
4414  * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
4415  */
4416  next_expected_xid = latestObservedXid;
4417  TransactionIdAdvance(next_expected_xid);
4418  KnownAssignedXidsAdd(next_expected_xid, xid, false);
4419 
4420  /*
4421  * Now we can advance latestObservedXid
4422  */
4423  latestObservedXid = xid;
4424 
4425  /* ShmemVariableCache->nextXid must be beyond any observed xid */
4427  }
4428 }
4429 
4430 /*
4431  * ExpireTreeKnownAssignedTransactionIds
4432  * Remove the given XIDs from KnownAssignedXids.
4433  *
4434  * Called during recovery in analogy with and in place of ProcArrayEndTransaction()
4435  */
4436 void
4438  TransactionId *subxids, TransactionId max_xid)
4439 {
4441 
4442  /*
4443  * Uses same locking as transaction commit
4444  */
4445  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4446 
4447  KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
4448 
4449  /* As in ProcArrayEndTransaction, advance latestCompletedXid */
4451 
4452  /* ... and xactCompletionCount */
4454 
4455  LWLockRelease(ProcArrayLock);
4456 }
4457 
4458 /*
4459  * ExpireAllKnownAssignedTransactionIds
4460  * Remove all entries in KnownAssignedXids and reset lastOverflowedXid.
4461  */
4462 void
4464 {
4465  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4467 
4468  /*
4469  * Reset lastOverflowedXid. Currently, lastOverflowedXid has no use after
4470  * the call of this function. But do this for unification with what
4471  * ExpireOldKnownAssignedTransactionIds() do.
4472  */
4474  LWLockRelease(ProcArrayLock);
4475 }
4476 
4477 /*
4478  * ExpireOldKnownAssignedTransactionIds
4479  * Remove KnownAssignedXids entries preceding the given XID and
4480  * potentially reset lastOverflowedXid.
4481  */
4482 void
4484 {
4485  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4486 
4487  /*
4488  * Reset lastOverflowedXid if we know all transactions that have been
4489  * possibly running are being gone. Not doing so could cause an incorrect
4490  * lastOverflowedXid value, which makes extra snapshots be marked as
4491  * suboverflowed.
4492  */
4496  LWLockRelease(ProcArrayLock);
4497 }
4498 
4499 
4500 /*
4501  * Private module functions to manipulate KnownAssignedXids
4502  *
4503  * There are 5 main uses of the KnownAssignedXids data structure:
4504  *
4505  * * backends taking snapshots - all valid XIDs need to be copied out
4506  * * backends seeking to determine presence of a specific XID
4507  * * startup process adding new known-assigned XIDs
4508  * * startup process removing specific XIDs as transactions end
4509  * * startup process pruning array when special WAL records arrive
4510  *
4511  * This data structure is known to be a hot spot during Hot Standby, so we
4512  * go to some lengths to make these operations as efficient and as concurrent
4513  * as possible.
4514  *
4515  * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes
4516  * order, to be exact --- to allow binary search for specific XIDs. Note:
4517  * in general TransactionIdPrecedes would not provide a total order, but
4518  * we know that the entries present at any instant should not extend across
4519  * a large enough fraction of XID space to wrap around (the primary would
4520  * shut down for fear of XID wrap long before that happens). So it's OK to
4521  * use TransactionIdPrecedes as a binary-search comparator.
4522  *
4523  * It's cheap to maintain the sortedness during insertions, since new known
4524  * XIDs are always reported in XID order; we just append them at the right.
4525  *
4526  * To keep individual deletions cheap, we need to allow gaps in the array.
4527  * This is implemented by marking array elements as valid or invalid using
4528  * the parallel boolean array KnownAssignedXidsValid[]. A deletion is done
4529  * by setting KnownAssignedXidsValid[i] to false, *without* clearing the
4530  * XID entry itself. This preserves the property that the XID entries are
4531  * sorted, so we can do binary searches easily. Periodically we compress
4532  * out the unused entries; that's much cheaper than having to compress the
4533  * array immediately on every deletion.
4534  *
4535  * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
4536  * are those with indexes tail <= i < head; items outside this subscript range
4537  * have unspecified contents. When head reaches the end of the array, we
4538  * force compression of unused entries rather than wrapping around, since
4539  * allowing wraparound would greatly complicate the search logic. We maintain
4540  * an explicit tail pointer so that pruning of old XIDs can be done without
4541  * immediately moving the array contents. In most cases only a small fraction
4542  * of the array contains valid entries at any instant.
4543  *
4544  * Although only the startup process can ever change the KnownAssignedXids
4545  * data structure, we still need interlocking so that standby backends will
4546  * not observe invalid intermediate states. The convention is that backends
4547  * must hold shared ProcArrayLock to examine the array. To remove XIDs from
4548  * the array, the startup process must hold ProcArrayLock exclusively, for
4549  * the usual transactional reasons (compare commit/abort of a transaction
4550  * during normal running). Compressing unused entries out of the array
4551  * likewise requires exclusive lock. To add XIDs to the array, we just insert
4552  * them into slots to the right of the head pointer and then advance the head
4553  * pointer. This wouldn't require any lock at all, except that on machines
4554  * with weak memory ordering we need to be careful that other processors
4555  * see the array element changes before they see the head pointer change.
4556  * We handle this by using a spinlock to protect reads and writes of the
4557  * head/tail pointers. (We could dispense with the spinlock if we were to
4558  * create suitable memory access barrier primitives and use those instead.)
4559  * The spinlock must be taken to read or write the head/tail pointers unless
4560  * the caller holds ProcArrayLock exclusively.
4561  *
4562  * Algorithmic analysis:
4563  *
4564  * If we have a maximum of M slots, with N XIDs currently spread across
4565  * S elements then we have N <= S <= M always.
4566  *
4567  * * Adding a new XID is O(1) and needs little locking (unless compression
4568  * must happen)
4569  * * Compressing the array is O(S) and requires exclusive lock
4570  * * Removing an XID is O(logS) and requires exclusive lock
4571  * * Taking a snapshot is O(S) and requires shared lock
4572  * * Checking for an XID is O(logS) and requires shared lock
4573  *
4574  * In comparison, using a hash table for KnownAssignedXids would mean that
4575  * taking snapshots would be O(M). If we can maintain S << M then the
4576  * sorted array technique will deliver significantly faster snapshots.
4577  * If we try to keep S too small then we will spend too much time compressing,
4578  * so there is an optimal point for any workload mix. We use a heuristic to
4579  * decide when to compress the array, though trimming also helps reduce
4580  * frequency of compressing. The heuristic requires us to track the number of
4581  * currently valid XIDs in the array.
4582  */
4583 
4584 
4585 /*
4586  * Compress KnownAssignedXids by shifting valid data down to the start of the
4587  * array, removing any gaps.
4588  *
4589  * A compression step is forced if "force" is true, otherwise we do it
4590  * only if a heuristic indicates it's a good time to do it.
4591  *
4592  * Caller must hold ProcArrayLock in exclusive mode.
4593  */
4594 static void
4596 {
4597  ProcArrayStruct *pArray = procArray;
4598  int head,
4599  tail;
4600  int compress_index;
4601  int i;
4602 
4603  /* no spinlock required since we hold ProcArrayLock exclusively */
4604  head = pArray->headKnownAssignedXids;
4605  tail = pArray->tailKnownAssignedXids;
4606 
4607  if (!force)
4608  {
4609  /*
4610  * If we can choose how much to compress, use a heuristic to avoid
4611  * compressing too often or not often enough.
4612  *
4613  * Heuristic is if we have a large enough current spread and less than
4614  * 50% of the elements are currently in use, then compress. This
4615  * should ensure we compress fairly infrequently. We could compress
4616  * less often though the virtual array would spread out more and
4617  * snapshots would become more expensive.
4618  */
4619  int nelements = head - tail;
4620 
4621  if (nelements < 4 * PROCARRAY_MAXPROCS ||
4622  nelements < 2 * pArray->numKnownAssignedXids)
4623  return;
4624  }
4625 
4626  /*
4627  * We compress the array by reading the valid values from tail to head,
4628  * re-aligning data to 0th element.
4629  */
4630  compress_index = 0;
4631  for (i = tail; i < head; i++)
4632  {
4634  {
4635  KnownAssignedXids[compress_index] = KnownAssignedXids[i];
4636  KnownAssignedXidsValid[compress_index] = true;
4637  compress_index++;
4638  }
4639  }
4640 
4641  pArray->tailKnownAssignedXids = 0;
4642  pArray->headKnownAssignedXids = compress_index;
4643 }
4644 
4645 /*
4646  * Add xids into KnownAssignedXids at the head of the array.
4647  *
4648  * xids from from_xid to to_xid, inclusive, are added to the array.
4649  *
4650  * If exclusive_lock is true then caller already holds ProcArrayLock in
4651  * exclusive mode, so we need no extra locking here. Else caller holds no
4652  * lock, so we need to be sure we maintain sufficient interlocks against
4653  * concurrent readers. (Only the startup process ever calls this, so no need
4654  * to worry about concurrent writers.)
4655  */
4656 static void
4658  bool exclusive_lock)
4659 {
4660  ProcArrayStruct *pArray = procArray;
4661  TransactionId next_xid;
4662  int head,
4663  tail;
4664  int nxids;
4665  int i;
4666 
4667  Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
4668 
4669  /*
4670  * Calculate how many array slots we'll need. Normally this is cheap; in
4671  * the unusual case where the XIDs cross the wrap point, we do it the hard
4672  * way.
4673  */
4674  if (to_xid >= from_xid)
4675  nxids = to_xid - from_xid + 1;
4676  else
4677  {
4678  nxids = 1;
4679  next_xid = from_xid;
4680  while (TransactionIdPrecedes(next_xid, to_xid))
4681  {
4682  nxids++;
4683  TransactionIdAdvance(next_xid);
4684  }
4685  }
4686 
4687  /*
4688  * Since only the startup process modifies the head/tail pointers, we
4689  * don't need a lock to read them here.
4690  */
4691  head = pArray->headKnownAssignedXids;
4692  tail = pArray->tailKnownAssignedXids;
4693 
4694  Assert(head >= 0 && head <= pArray->maxKnownAssignedXids);
4695  Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids);
4696 
4697  /*
4698  * Verify that insertions occur in TransactionId sequence. Note that even
4699  * if the last existing element is marked invalid, it must still have a
4700  * correctly sequenced XID value.
4701  */
4702  if (head > tail &&
4703  TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid))
4704  {
4706  elog(ERROR, "out-of-order XID insertion in KnownAssignedXids");
4707  }
4708 
4709  /*
4710  * If our xids won't fit in the remaining space, compress out free space
4711  */
4712  if (head + nxids > pArray->maxKnownAssignedXids)
4713  {
4714  /* must hold lock to compress */
4715  if (!exclusive_lock)
4716  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4717 
4719 
4720  head = pArray->headKnownAssignedXids;
4721  /* note: we no longer care about the tail pointer */
4722 
4723  if (!exclusive_lock)
4724  LWLockRelease(ProcArrayLock);
4725 
4726  /*
4727  * If it still won't fit then we're out of memory
4728  */
4729  if (head + nxids > pArray->maxKnownAssignedXids)
4730  elog(ERROR, "too many KnownAssignedXids");
4731  }
4732 
4733  /* Now we can insert the xids into the space starting at head */
4734  next_xid = from_xid;
4735  for (i = 0; i < nxids; i++)
4736  {
4737  KnownAssignedXids[head] = next_xid;
4738  KnownAssignedXidsValid[head] = true;
4739  TransactionIdAdvance(next_xid);
4740  head++;
4741  }
4742 
4743  /* Adjust count of number of valid entries */
4744  pArray->numKnownAssignedXids += nxids;
4745 
4746  /*
4747  * Now update the head pointer. We use a spinlock to protect this
4748  * pointer, not because the update is likely to be non-atomic, but to
4749  * ensure that other processors see the above array updates before they
4750  * see the head pointer change.
4751  *
4752  * If we're holding ProcArrayLock exclusively, there's no need to take the
4753  * spinlock.
4754  */
4755  if (exclusive_lock)
4756  pArray->headKnownAssignedXids = head;
4757  else
4758  {
4760  pArray->headKnownAssignedXids = head;
4762  }
4763 }
4764 
4765 /*
4766  * KnownAssignedXidsSearch
4767  *
4768  * Searches KnownAssignedXids for a specific xid and optionally removes it.
4769  * Returns true if it was found, false if not.
4770  *
4771  * Caller must hold ProcArrayLock in shared or exclusive mode.
4772  * Exclusive lock must be held for remove = true.
4773  */
4774 static bool
4776 {
4777  ProcArrayStruct *pArray = procArray;
4778  int first,
4779  last;
4780  int head;
4781  int tail;
4782  int result_index = -1;
4783 
4784  if (remove)
4785  {
4786  /* we hold ProcArrayLock exclusively, so no need for spinlock */
4787  tail = pArray->tailKnownAssignedXids;
4788  head = pArray->headKnownAssignedXids;
4789  }
4790  else
4791  {
4792  /* take spinlock to ensure we see up-to-date array contents */
4794  tail = pArray->tailKnownAssignedXids;
4795  head = pArray->headKnownAssignedXids;
4797  }
4798 
4799  /*
4800  * Standard binary search. Note we can ignore the KnownAssignedXidsValid
4801  * array here, since even invalid entries will contain sorted XIDs.
4802  */
4803  first = tail;
4804  last = head - 1;
4805  while (first <= last)
4806  {
4807  int mid_index;
4808  TransactionId mid_xid;
4809 
4810  mid_index = (first + last) / 2;
4811  mid_xid = KnownAssignedXids[mid_index];
4812 
4813  if (xid == mid_xid)
4814  {
4815  result_index = mid_index;
4816  break;
4817  }
4818  else if (TransactionIdPrecedes(xid, mid_xid))
4819  last = mid_index - 1;
4820  else
4821  first = mid_index + 1;
4822  }
4823 
4824  if (result_index < 0)
4825  return false; /* not in array */
4826 
4827  if (!KnownAssignedXidsValid[result_index])
4828  return false; /* in array, but invalid */
4829 
4830  if (remove)
4831  {
4832  KnownAssignedXidsValid[result_index] = false;
4833 
4834  pArray->numKnownAssignedXids--;
4835  Assert(pArray->numKnownAssignedXids >= 0);
4836 
4837  /*
4838  * If we're removing the tail element then advance tail pointer over
4839  * any invalid elements. This will speed future searches.
4840  */
4841  if (result_index == tail)
4842  {
4843  tail++;
4844  while (tail < head && !KnownAssignedXidsValid[tail])
4845  tail++;
4846  if (tail >= head)
4847  {
4848  /* Array is empty, so we can reset both pointers */
4849  pArray->headKnownAssignedXids = 0;
4850  pArray->tailKnownAssignedXids = 0;
4851  }
4852  else
4853  {
4854  pArray->tailKnownAssignedXids = tail;
4855  }
4856  }
4857  }
4858 
4859  return true;
4860 }
4861 
4862 /*
4863  * Is the specified XID present in KnownAssignedXids[]?
4864  *
4865  * Caller must hold ProcArrayLock in shared or exclusive mode.
4866  */
4867 static bool
4869 {
4871 
4872  return KnownAssignedXidsSearch(xid, false);
4873 }
4874 
4875 /*
4876  * Remove the specified XID from KnownAssignedXids[].
4877  *
4878  * Caller must hold ProcArrayLock in exclusive mode.
4879  */
4880 static void
4882 {
4884 
4885  elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
4886 
4887  /*
4888  * Note: we cannot consider it an error to remove an XID that's not
4889  * present. We intentionally remove subxact IDs while processing
4890  * XLOG_XACT_ASSIGNMENT, to avoid array overflow. Then those XIDs will be
4891  * removed again when the top-level xact commits or aborts.
4892  *
4893  * It might be possible to track such XIDs to distinguish this case from
4894  * actual errors, but it would be complicated and probably not worth it.
4895  * So, just ignore the search result.
4896  */
4897  (void) KnownAssignedXidsSearch(xid, true);
4898 }
4899 
4900 /*
4901  * KnownAssignedXidsRemoveTree
4902  * Remove xid (if it's not InvalidTransactionId) and all the subxids.
4903  *
4904  * Caller must hold ProcArrayLock in exclusive mode.
4905  */
4906 static void
4908  TransactionId *subxids)
4909 {
4910  int i;
4911 
4912  if (TransactionIdIsValid(xid))
4914 
4915  for (i = 0; i < nsubxids; i++)
4916  KnownAssignedXidsRemove(subxids[i]);
4917 
4918  /* Opportunistically compress the array */
4920 }
4921 
4922 /*
4923  * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid
4924  * then clear the whole table.
4925  *
4926  * Caller must hold ProcArrayLock in exclusive mode.
4927  */
4928 static void
4930 {
4931  ProcArrayStruct *pArray = procArray;
4932  int count = 0;
4933  int head,
4934  tail,
4935  i;
4936 
4937  if (!TransactionIdIsValid(removeXid))
4938  {
4939  elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
4940  pArray->numKnownAssignedXids = 0;
4941  pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0;
4942  return;
4943  }
4944 
4945  elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid);
4946 
4947  /*
4948  * Mark entries invalid starting at the tail. Since array is sorted, we
4949  * can stop as soon as we reach an entry >= removeXid.
4950  */
4951  tail = pArray->tailKnownAssignedXids;
4952  head = pArray->headKnownAssignedXids;
4953 
4954  for (i = tail; i < head; i++)
4955  {
4957  {
4958  TransactionId knownXid = KnownAssignedXids[i];
4959 
4960  if (TransactionIdFollowsOrEquals(knownXid, removeXid))
4961  break;
4962 
4963  if (!StandbyTransactionIdIsPrepared(knownXid))
4964  {
4965  KnownAssignedXidsValid[i] = false;
4966  count++;
4967  }
4968  }
4969  }
4970 
4971  pArray->numKnownAssignedXids -= count;
4972  Assert(pArray->numKnownAssignedXids >= 0);
4973 
4974  /*
4975  * Advance the tail pointer if we've marked the tail item invalid.
4976  */
4977  for (i = tail; i < head; i++)
4978  {
4980  break;
4981  }
4982  if (i >= head)
4983  {
4984  /* Array is empty, so we can reset both pointers */
4985  pArray->headKnownAssignedXids = 0;
4986  pArray->tailKnownAssignedXids = 0;
4987  }
4988  else
4989  {
4990  pArray->tailKnownAssignedXids = i;
4991  }
4992 
4993  /* Opportunistically compress the array */
4995 }
4996 
4997 /*
4998  * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
4999  * We filter out anything >= xmax.
5000  *
5001  * Returns the number of XIDs stored into xarray[]. Caller is responsible
5002  * that array is large enough.
5003  *
5004  * Caller must hold ProcArrayLock in (at least) shared mode.
5005  */
5006 static int
5008 {
5010 
5011  return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax);
5012 }
5013 
5014 /*
5015  * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus
5016  * we reduce *xmin to the lowest xid value seen if not already lower.
5017  *
5018  * Caller must hold ProcArrayLock in (at least) shared mode.
5019  */
5020 static int
5022  TransactionId xmax)
5023 {
5024  int count = 0;
5025  int head,
5026  tail;
5027  int i;
5028 
5029  /*
5030  * Fetch head just once, since it may change while we loop. We can stop
5031  * once we reach the initially seen head, since we are certain that an xid
5032  * cannot enter and then leave the array while we hold ProcArrayLock. We
5033  * might miss newly-added xids, but they should be >= xmax so irrelevant
5034  * anyway.
5035  *
5036  * Must take spinlock to ensure we see up-to-date array contents.
5037  */
5042 
5043  for (i = tail; i < head; i++)
5044  {
5045  /* Skip any gaps in the array */
5047  {
5048  TransactionId knownXid = KnownAssignedXids[i];
5049 
5050  /*
5051  * Update xmin if required. Only the first XID need be checked,
5052  * since the array is sorted.
5053  */
5054  if (count == 0 &&
5055  TransactionIdPrecedes(knownXid, *xmin))
5056  *xmin = knownXid;
5057 
5058  /*
5059  * Filter out anything >= xmax, again relying on sorted property
5060  * of array.
5061  */
5062  if (TransactionIdIsValid(xmax) &&
5063  TransactionIdFollowsOrEquals(knownXid, xmax))
5064  break;
5065 
5066  /* Add knownXid into output array */
5067  xarray[count++] = knownXid;
5068  }
5069  }
5070 
5071  return count;
5072 }
5073 
5074 /*
5075  * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId
5076  * if nothing there.
5077  */
5078 static TransactionId
5080 {
5081  int head,
5082  tail;
5083  int i;
5084 
5085  /*
5086  * Fetch head just once, since it may change while we loop.
5087  */
5092 
5093  for (i = tail; i < head; i++)
5094  {
5095  /* Skip any gaps in the array */
5097  return KnownAssignedXids[i];
5098  }
5099 
5100  return InvalidTransactionId;
5101 }
5102 
5103 /*
5104  * Display KnownAssignedXids to provide debug trail
5105  *
5106  * Currently this is only called within startup process, so we need no
5107  * special locking.
5108  *
5109  * Note this is pretty expensive, and much of the expense will be incurred
5110  * even if the elog message will get discarded. It's not currently called
5111  * in any performance-critical places, however, so no need to be tenser.
5112  */
5113 static void
5115 {
5116  ProcArrayStruct *pArray = procArray;
5118  int head,
5119  tail,
5120  i;
5121  int nxids = 0;
5122 
5123  tail = pArray->tailKnownAssignedXids;
5124  head = pArray->headKnownAssignedXids;
5125 
5126  initStringInfo(&buf);
5127 
5128  for (i = tail; i < head; i++)
5129  {
5131  {
5132  nxids++;
5133  appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
5134  }
5135  }
5136 
5137  elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",
5138  nxids,
5139  pArray->numKnownAssignedXids,
5140  pArray->tailKnownAssignedXids,
5141  pArray->headKnownAssignedXids,
5142  buf.data);
5143 
5144  pfree(buf.data);
5145 }
5146 
5147 /*
5148  * KnownAssignedXidsReset
5149  * Resets KnownAssignedXids to be empty
5150  */
5151 static void
5153 {
5154  ProcArrayStruct *pArray = procArray;
5155 
5156  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
5157 
5158  pArray->numKnownAssignedXids = 0;
5159  pArray->tailKnownAssignedXids = 0;
5160  pArray->headKnownAssignedXids = 0;
5161 
5162  LWLockRelease(ProcArrayLock);
5163 }
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:4843
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
#define pg_read_barrier()
Definition: atomics.h:158
#define pg_write_barrier()
Definition: atomics.h:159
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
static uint32 pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval)
Definition: atomics.h:292
#define InvalidBackendId
Definition: backendid.h:23
unsigned int uint32
Definition: c.h:441
signed char int8
Definition: c.h:427
#define likely(x)
Definition: c.h:272
#define offsetof(type, field)
Definition: c.h:727
signed int int32
Definition: c.h:429
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:350
#define unlikely(x)
Definition: c.h:273
unsigned char uint8
Definition: c.h:439
uint32 TransactionId
Definition: c.h:587
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:155
#define OidIsValid(objectId)
Definition: c.h:710
size_t Size
Definition: c.h:540
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:104
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2113
int errdetail_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1129
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
int trace_recovery(int trace_level)
Definition: elog.c:3425
#define LOG
Definition: elog.h:25
#define DEBUG3
Definition: elog.h:22
#define FATAL
Definition: elog.h:35
#define WARNING
Definition: elog.h:30
#define DEBUG1
Definition: elog.h:24
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define ereport(elevel,...)
Definition: elog.h:143
#define DEBUG4
Definition: elog.h:21
bool IsUnderPostmaster
Definition: globals.c:112
Oid MyDatabaseId
Definition: globals.c:88
#define malloc(a)
Definition: header.h:50
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend_int(List *list, int datum)
Definition: list.c:354
#define VirtualTransactionIdIsValid(vxid)
Definition: lock.h:72
#define InvalidLocalTransactionId
Definition: lock.h:70
#define VirtualTransactionIdEquals(vxid1, vxid2)
Definition: lock.h:76
#define GET_VXID_FROM_PGPROC(vxid, proc)
Definition: lock.h:82
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1937
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1370
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1919
@ LW_SHARED
Definition: lwlock.h:105
@ LW_EXCLUSIVE
Definition: lwlock.h:104
void pfree(void *pointer)
Definition: mcxt.c:1169
void * palloc(Size size)
Definition: mcxt.c:1062
#define AmStartupProcess()
Definition: miscadmin.h:444
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:406
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120
Oid GetUserId(void)
Definition: miscinit.c:495
#define NIL
Definition: pg_list.h:65
#define lfirst_int(lc)
Definition: pg_list.h:170
static char * buf
Definition: pg_test_fsync.c:70
#define fprintf
Definition: port.h:229
#define qsort(a, b, c, d)
Definition: port.h:495
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: posix_sema.c:340
void PGSemaphoreLock(PGSemaphore sema)
Definition: posix_sema.c:320
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
#define PROC_IN_LOGICAL_DECODING
Definition: proc.h:58
#define NUM_AUXILIARY_PROCS
Definition: proc.h:377
#define PROC_IN_SAFE_IC
Definition: proc.h:56
#define INVALID_PGPROCNO
Definition: proc.h:83
#define PROC_IN_VACUUM
Definition: proc.h:55
#define PROC_COPYABLE_FLAGS
Definition: proc.h:69
#define PROC_VACUUM_STATE_MASK
Definition: proc.h:61
#define PROC_IS_AUTOVACUUM
Definition: proc.h:54
static GlobalVisState GlobalVisDataRels
Definition: procarray.c:286
bool GlobalVisTestIsRemovableFullXid(GlobalVisState *state, FullTransactionId fxid)
Definition: procarray.c:4157
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:2005
static void GetSnapshotDataInitOldSnapshot(Snapshot snapshot)
Definition: procarray.c:2089
#define TOTAL_MAX_CACHED_SUBXIDS
static GlobalVisState GlobalVisSharedRels
Definition: procarray.c:284
void ProcArrayGetReplicationSlotXmin(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:3903
static GlobalVisState GlobalVisCatalogRels
Definition: procarray.c:285
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4199
bool GlobalVisCheckRemovableFullXid(Relation rel, FullTransactionId fxid)
Definition: procarray.c:4251
pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3442
Size ProcArrayShmemSize(void)
Definition: procarray.c:363
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition: procarray.c:2960
void XidCacheRemoveRunningXids(TransactionId xid, int nxids, const TransactionId *xids, TransactionId latestXid)
Definition: procarray.c:3926
bool TransactionIdIsActive(TransactionId xid)
Definition: procarray.c:1591
static FullTransactionId FullXidRelativeTo(FullTransactionId rel, TransactionId xid)
Definition: procarray.c:4286
bool MinimumActiveBackends(int min)
Definition: procarray.c:3491
void TerminateOtherDBBackends(Oid databaseId)
Definition: procarray.c:3773
#define xc_no_overflow_inc()
Definition: procarray.c:329
static TransactionId standbySnapshotPendingXmin
Definition: procarray.c:277
void ExpireAllKnownAssignedTransactionIds(void)
Definition: procarray.c:4463
#define UINT32_ACCESS_ONCE(var)
Definition: procarray.c:69
VirtualTransactionId * GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
Definition: procarray.c:3362
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2720
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2895
static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:4907
static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, TransactionId xmax)
Definition: procarray.c:5021
#define xc_by_recent_xmin_inc()
Definition: procarray.c:322
void ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:654
static void KnownAssignedXidsRemovePreceding(TransactionId xid)
Definition: procarray.c:4929
static PGPROC * allProcs
Definition: procarray.c:263
void RecordKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:4368
static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
Definition: procarray.c:5007
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:2034
static TransactionId latestObservedXid
Definition: procarray.c:270
static ProcArrayStruct * procArray
Definition: procarray.c:261
int GetMaxSnapshotSubxidCount(void)
Definition: procarray.c:2080
int CountDBConnections(Oid databaseid)
Definition: procarray.c:3574
static GlobalVisState GlobalVisTempRels
Definition: procarray.c:287
#define xc_by_my_xact_inc()
Definition: procarray.c:324
#define xc_by_known_assigned_inc()
Definition: procarray.c:328
struct ProcArrayStruct ProcArrayStruct
void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3605
#define PROCARRAY_MAXPROCS
void GetReplicationHorizons(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:2047
static bool GlobalVisTestShouldUpdate(GlobalVisState *state)
Definition: procarray.c:4082
static void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:715
void ProcArrayAdd(PGPROC *proc)
Definition: procarray.c:456
struct ComputeXidHorizonsResult ComputeXidHorizonsResult
TransactionId GlobalVisTestNonRemovableHorizon(GlobalVisState *state)
Definition: procarray.c:4237
static TransactionId * KnownAssignedXids
Definition: procarray.c:268
#define xc_by_child_xid_inc()
Definition: procarray.c:327
pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
Definition: procarray.c:3436
Snapshot GetSnapshotData(Snapshot snapshot)
Definition: procarray.c:2206
static bool * KnownAssignedXidsValid
Definition: procarray.c:269
static void KnownAssignedXidsRemove(TransactionId xid)
Definition: procarray.c:4881
static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
Definition: procarray.c:4101
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:2069
static void KnownAssignedXidsCompress(bool force)
Definition: procarray.c:4595
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4042
int CountDBBackends(Oid databaseid)
Definition: procarray.c:3544
bool GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
Definition: procarray.c:4265
#define MAXAUTOVACPIDS
bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
Definition: procarray.c:2648
void CreateSharedProcArray(void)
Definition: procarray.c:405
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
Definition: procarray.c:3100
static TransactionId KnownAssignedXidsGetOldestXmin(void)
Definition: procarray.c:5079
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1034
void ProcArrayClearTransaction(PGPROC *proc)
Definition: procarray.c:887
VirtualTransactionId * GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, bool allDbs, int excludeVacuum, int *nvxids)
Definition: procarray.c:3274
int CountUserBackends(Oid roleid)
Definition: procarray.c:3645
PGPROC * BackendPidGetProc(int pid)
Definition: procarray.c:3146
static TransactionId ComputeXidHorizonsResultLastXmin
Definition: procarray.c:294
static void GlobalVisUpdate(void)
Definition: procarray.c:4140
#define xc_slow_answer_inc()
Definition: procarray.c:330
static void KnownAssignedXidsDisplay(int trace_level)
Definition: procarray.c:5114
#define xc_by_main_xid_inc()
Definition: procarray.c:326
PGPROC * BackendPidGetProcWithLock(int pid)
Definition: procarray.c:3169
static void MaintainLatestCompletedXidRecovery(TransactionId latestXid)
Definition: procarray.c:969
static void ComputeXidHorizons(ComputeXidHorizonsResult *h)
Definition: procarray.c:1701
void ProcArrayApplyXidAssignment(TransactionId topxid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:1275
static bool KnownAssignedXidExists(TransactionId xid)
Definition: procarray.c:4868
bool CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
Definition: procarray.c:3695
GlobalVisHorizonKind
Definition: procarray.c:253
@ VISHORIZON_SHARED
Definition: procarray.c:254
@ VISHORIZON_DATA
Definition: procarray.c:256
@ VISHORIZON_CATALOG
Definition: procarray.c:255
@ VISHORIZON_TEMP
Definition: procarray.c:257
int BackendXidGetPid(TransactionId xid)
Definition: procarray.c:3206
#define xc_by_latest_xid_inc()
Definition: procarray.c:325
bool IsBackendPid(int pid)
Definition: procarray.c:3241
#define xc_by_known_xact_inc()
Definition: procarray.c:323
static bool KnownAssignedXidsSearch(TransactionId xid, bool remove)
Definition: procarray.c:4775
static void KnownAssignedXidsReset(void)
Definition: procarray.c:5152
FullTransactionId GlobalVisTestNonRemovableFullHorizon(GlobalVisState *state)
Definition: procarray.c:4226
static GlobalVisHorizonKind GlobalVisHorizonKindForRel(Relation rel)
Definition: procarray.c:1971
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
Definition: procarray.c:3881
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1003
void ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:552
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids)
Definition: procarray.c:3056
static void MaintainLatestCompletedXid(TransactionId latestXid)
Definition: procarray.c:947
static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:773
void ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, TransactionId *subxids, TransactionId max_xid)
Definition: procarray.c:4437
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
Definition: procarray.c:2569
static bool GetSnapshotDataReuse(Snapshot snapshot)
Definition: procarray.c:2123
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, bool exclusive_lock)
Definition: procarray.c:4657
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1359
void ExpireOldKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:4483
int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
Definition: procsignal.c:261
ProcSignalReason
Definition: procsignal.h:31
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:622
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:658
int slock_t
Definition: s_lock.h:958
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
void pg_usleep(long microsec)
Definition: signal.c:53
void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
Definition: snapmgr.c:1873
TransactionId RecentXmin
Definition: snapmgr.c:113
TimestampTz GetSnapshotCurrentTimestamp(void)
Definition: snapmgr.c:1650
TransactionId TransactionXmin
Definition: snapmgr.c:112
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:101
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
PGPROC * MyProc
Definition: proc.c:68
PROC_HDR * ProcGlobal
Definition: proc.c:80
int vacuum_defer_cleanup_age
Definition: standby.c:39
void StandbyReleaseOldLocks(TransactionId oldxid)
Definition: standby.c:1072
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
TransactionId slot_catalog_xmin
Definition: procarray.c:194
TransactionId data_oldest_nonremovable
Definition: procarray.c:239
TransactionId temp_oldest_nonremovable
Definition: procarray.c:245
TransactionId shared_oldest_nonremovable
Definition: procarray.c:216
TransactionId oldest_considered_running
Definition: procarray.c:207
TransactionId slot_xmin
Definition: procarray.c:193
FullTransactionId latest_completed
Definition: procarray.c:187
TransactionId catalog_oldest_nonremovable
Definition: procarray.c:233
TransactionId shared_oldest_nonremovable_raw
Definition: procarray.c:227
FullTransactionId definitely_needed
Definition: procarray.c:172
FullTransactionId maybe_needed
Definition: procarray.c:175
Definition: pg_list.h:51
Definition: proc.h:125
TransactionId xmin
Definition: proc.h:141
bool procArrayGroupMember
Definition: proc.h:219
LocalTransactionId lxid
Definition: proc.h:146
pg_atomic_uint32 procArrayGroupNext
Definition: proc.h:221
uint8 statusFlags
Definition: proc.h:192
bool recoveryConflictPending
Definition: proc.h:170
Oid databaseId
Definition: proc.h:157
bool delayChkpt
Definition: proc.h:190
BackendId backendId
Definition: proc.h:156
int pid
Definition: proc.h:149
bool isBackgroundWorker
Definition: proc.h:163
int pgxactoff
Definition: proc.h:151
XidCacheStatus subxidStatus
Definition: proc.h:213
LOCK * waitLock
Definition: proc.h:182
TransactionId xid
Definition: proc.h:136
int pgprocno
Definition: proc.h:153
struct XidCache subxids
Definition: proc.h:215
TransactionId procArrayGroupMemberXid
Definition: proc.h:227
PGSemaphore sem
Definition: proc.h:130
Oid roleId
Definition: proc.h:158
Definition: proc.h:319
uint8 * statusFlags
Definition: proc.h:336
XidCacheStatus * subxidStates
Definition: proc.h:330
PGPROC * allProcs
Definition: proc.h:321
TransactionId * xids
Definition: proc.h:324
pg_atomic_uint32 procArrayGroupFirst
Definition: proc.h:349
TransactionId replication_slot_xmin
Definition: procarray.c:96
int maxKnownAssignedXids
Definition: procarray.c:80
TransactionId replication_slot_catalog_xmin
Definition: procarray.c:98
slock_t known_assigned_xids_lck
Definition: procarray.c:84
int numKnownAssignedXids
Definition: procarray.c:81
int pgprocnos[FLEXIBLE_ARRAY_MEMBER]
Definition: procarray.c:101
TransactionId lastOverflowedXid
Definition: procarray.c:93
int tailKnownAssignedXids
Definition: procarray.c:82
int headKnownAssignedXids
Definition: procarray.c:83
Form_pg_class rd_rel
Definition: rel.h:109
TransactionId oldestRunningXid
Definition: standby.h:83
TransactionId nextXid
Definition: standby.h:82
TransactionId latestCompletedXid
Definition: standby.h:84
TransactionId * xids
Definition: standby.h:86
TransactionId xmin
Definition: snapshot.h:157
int32 subxcnt
Definition: snapshot.h:181
bool copied
Definition: snapshot.h:185
uint32 regd_count
Definition: snapshot.h:205
uint32 active_count
Definition: snapshot.h:204
CommandId curcid
Definition: snapshot.h:187
TimestampTz whenTaken
Definition: snapshot.h:208
uint32 xcnt
Definition: snapshot.h:169
TransactionId * subxip
Definition: snapshot.h:180
uint64 snapXactCompletionCount
Definition: snapshot.h:216
TransactionId xmax
Definition: snapshot.h:158
XLogRecPtr lsn
Definition: snapshot.h:209
TransactionId * xip
Definition: snapshot.h:168
bool suboverflowed
Definition: snapshot.h:182
bool takenDuringRecovery
Definition: snapshot.h:184
FullTransactionId nextXid
Definition: transam.h:220
uint64 xactCompletionCount
Definition: transam.h:248
TransactionId oldestXid
Definition: transam.h:222
FullTransactionId latestCompletedXid
Definition: transam.h:238
LocalTransactionId localTransactionId
Definition: lock.h:67
BackendId backendId
Definition: lock.h:66
bool overflowed
Definition: proc.h:43
uint8 count
Definition: proc.h:41
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]
Definition: proc.h:48
Definition: type.h:90
Definition: regguts.h:318
void SubTransSetParent(TransactionId xid, TransactionId parent)
Definition: subtrans.c:74
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:150
void ExtendSUBTRANS(TransactionId newestXact)
Definition: subtrans.c:308
bool superuser_arg(Oid roleid)
Definition: superuser.c:56
bool superuser(void)
Definition: superuser.c:46
TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids)
Definition: transam.c:365
bool TransactionIdIsKnownCompleted(TransactionId transactionId)
Definition: transam.c:238
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:125
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:181
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:349
#define FullTransactionIdIsNormal(x)
Definition: transam.h:58
static TransactionId TransactionIdRetreatedBy(TransactionId xid, uint32 amount)
Definition: transam.h:323
static FullTransactionId FullTransactionIdNewer(FullTransactionId a, FullTransactionId b)
Definition: transam.h:361
#define TransactionIdRetreat(dest)
Definition: transam.h:141
#define InvalidTransactionId
Definition: transam.h:31
#define U64FromFullTransactionId(x)
Definition: transam.h:49
static FullTransactionId FullTransactionIdFromU64(uint64 value)
Definition: transam.h:81
#define FullTransactionIdFollowsOrEquals(a, b)
Definition: transam.h:54
#define AssertTransactionIdInAllowableRange(xid)
Definition: transam.h:302