PostgreSQL Source Code  git master
procarray.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * procarray.c
4  * POSTGRES process array code.
5  *
6  *
7  * This module maintains arrays of PGPROC substructures, as well as associated
8  * arrays in ProcGlobal, for all active backends. Although there are several
9  * uses for this, the principal one is as a means of determining the set of
10  * currently running transactions.
11  *
12  * Because of various subtle race conditions it is critical that a backend
13  * hold the correct locks while setting or clearing its xid (in
14  * ProcGlobal->xids[]/MyProc->xid). See notes in
15  * src/backend/access/transam/README.
16  *
17  * The process arrays now also include structures representing prepared
18  * transactions. The xid and subxids fields of these are valid, as are the
19  * myProcLocks lists. They can be distinguished from regular backend PGPROCs
20  * at need by checking for pid == 0.
21  *
22  * During hot standby, we also keep a list of XIDs representing transactions
23  * that are known to be running on the primary (or more precisely, were running
24  * as of the current point in the WAL stream). This list is kept in the
25  * KnownAssignedXids array, and is updated by watching the sequence of
26  * arriving XIDs. This is necessary because if we leave those XIDs out of
27  * snapshots taken for standby queries, then they will appear to be already
28  * complete, leading to MVCC failures. Note that in hot standby, the PGPROC
29  * array represents standby processes, which by definition are not running
30  * transactions that have XIDs.
31  *
32  * It is perhaps possible for a backend on the primary to terminate without
33  * writing an abort record for its transaction. While that shouldn't really
34  * happen, it would tie up KnownAssignedXids indefinitely, so we protect
35  * ourselves by pruning the array when a valid list of running XIDs arrives.
36  *
37  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
38  * Portions Copyright (c) 1994, Regents of the University of California
39  *
40  *
41  * IDENTIFICATION
42  * src/backend/storage/ipc/procarray.c
43  *
44  *-------------------------------------------------------------------------
45  */
46 #include "postgres.h"
47 
48 #include <signal.h>
49 
50 #include "access/clog.h"
51 #include "access/subtrans.h"
52 #include "access/transam.h"
53 #include "access/twophase.h"
54 #include "access/xact.h"
55 #include "access/xlog.h"
56 #include "catalog/catalog.h"
57 #include "catalog/pg_authid.h"
58 #include "commands/dbcommands.h"
59 #include "miscadmin.h"
60 #include "pgstat.h"
61 #include "storage/proc.h"
62 #include "storage/procarray.h"
63 #include "storage/spin.h"
64 #include "utils/acl.h"
65 #include "utils/builtins.h"
66 #include "utils/rel.h"
67 #include "utils/snapmgr.h"
68 
69 #define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
70 
71 /* Our shared memory area */
72 typedef struct ProcArrayStruct
73 {
74  int numProcs; /* number of valid procs entries */
75  int maxProcs; /* allocated size of procs array */
76 
77  /*
78  * Known assigned XIDs handling
79  */
80  int maxKnownAssignedXids; /* allocated size of array */
81  int numKnownAssignedXids; /* current # of valid entries */
82  int tailKnownAssignedXids; /* index of oldest valid element */
83  int headKnownAssignedXids; /* index of newest element, + 1 */
84  slock_t known_assigned_xids_lck; /* protects head/tail pointers */
85 
86  /*
87  * Highest subxid that has been removed from KnownAssignedXids array to
88  * prevent overflow; or InvalidTransactionId if none. We track this for
89  * similar reasons to tracking overflowing cached subxids in PGPROC
90  * entries. Must hold exclusive ProcArrayLock to change this, and shared
91  * lock to read it.
92  */
94 
95  /* oldest xmin of any replication slot */
97  /* oldest catalog xmin of any replication slot */
99 
100  /* indexes into allProcs[], has PROCARRAY_MAXPROCS entries */
103 
104 /*
105  * State for the GlobalVisTest* family of functions. Those functions can
106  * e.g. be used to decide if a deleted row can be removed without violating
107  * MVCC semantics: If the deleted row's xmax is not considered to be running
108  * by anyone, the row can be removed.
109  *
110  * To avoid slowing down GetSnapshotData(), we don't calculate a precise
111  * cutoff XID while building a snapshot (looking at the frequently changing
112  * xmins scales badly). Instead we compute two boundaries while building the
113  * snapshot:
114  *
115  * 1) definitely_needed, indicating that rows deleted by XIDs >=
116  * definitely_needed are definitely still visible.
117  *
118  * 2) maybe_needed, indicating that rows deleted by XIDs < maybe_needed can
119  * definitely be removed
120  *
121  * When testing an XID that falls in between the two (i.e. XID >= maybe_needed
122  * && XID < definitely_needed), the boundaries can be recomputed (using
123  * ComputeXidHorizons()) to get a more accurate answer. This is cheaper than
124  * maintaining an accurate value all the time.
125  *
126  * As it is not cheap to compute accurate boundaries, we limit the number of
127  * times that happens in short succession. See GlobalVisTestShouldUpdate().
128  *
129  *
130  * There are three backend lifetime instances of this struct, optimized for
131  * different types of relations. As e.g. a normal user defined table in one
132  * database is inaccessible to backends connected to another database, a test
133  * specific to a relation can be more aggressive than a test for a shared
134  * relation. Currently we track three different states:
135  *
136  * 1) GlobalVisSharedRels, which only considers an XID's
137  * effects visible-to-everyone if neither snapshots in any database, nor a
138  * replication slot's xmin, nor a replication slot's catalog_xmin might
139  * still consider XID as running.
140  *
141  * 2) GlobalVisCatalogRels, which only considers an XID's
142  * effects visible-to-everyone if neither snapshots in the current
143  * database, nor a replication slot's xmin, nor a replication slot's
144  * catalog_xmin might still consider XID as running.
145  *
146  * I.e. the difference to GlobalVisSharedRels is that
147  * snapshot in other databases are ignored.
148  *
149  * 3) GlobalVisDataRels, which only considers an XID's
150  * effects visible-to-everyone if neither snapshots in the current
151  * database, nor a replication slot's xmin consider XID as running.
152  *
153  * I.e. the difference to GlobalVisCatalogRels is that
154  * replication slot's catalog_xmin is not taken into account.
155  *
156  * GlobalVisTestFor(relation) returns the appropriate state
157  * for the relation.
158  *
159  * The boundaries are FullTransactionIds instead of TransactionIds to avoid
160  * wraparound dangers. There e.g. would otherwise exist no procarray state to
161  * prevent maybe_needed to become old enough after the GetSnapshotData()
162  * call.
163  *
164  * The typedef is in the header.
165  */
167 {
168  /* XIDs >= are considered running by some backend */
170 
171  /* XIDs < are not considered to be running by any backend */
173 };
174 
175 /*
176  * Result of ComputeXidHorizons().
177  */
179 {
180  /*
181  * The value of ShmemVariableCache->latestCompletedXid when
182  * ComputeXidHorizons() held ProcArrayLock.
183  */
185 
186  /*
187  * The same for procArray->replication_slot_xmin and.
188  * procArray->replication_slot_catalog_xmin.
189  */
192 
193  /*
194  * Oldest xid that any backend might still consider running. This needs to
195  * include processes running VACUUM, in contrast to the normal visibility
196  * cutoffs, as vacuum needs to be able to perform pg_subtrans lookups when
197  * determining visibility, but doesn't care about rows above its xmin to
198  * be removed.
199  *
200  * This likely should only be needed to determine whether pg_subtrans can
201  * be truncated. It currently includes the effects of replication slots,
202  * for historical reasons. But that could likely be changed.
203  */
205 
206  /*
207  * Oldest xid for which deleted tuples need to be retained in shared
208  * tables.
209  *
210  * This includes the effects of replication slots. If that's not desired,
211  * look at shared_oldest_nonremovable_raw;
212  */
214 
215  /*
216  * Oldest xid that may be necessary to retain in shared tables. This is
217  * the same as shared_oldest_nonremovable, except that is not affected by
218  * replication slot's catalog_xmin.
219  *
220  * This is mainly useful to be able to send the catalog_xmin to upstream
221  * streaming replication servers via hot_standby_feedback, so they can
222  * apply the limit only when accessing catalog tables.
223  */
225 
226  /*
227  * Oldest xid for which deleted tuples need to be retained in non-shared
228  * catalog tables.
229  */
231 
232  /*
233  * Oldest xid for which deleted tuples need to be retained in normal user
234  * defined tables.
235  */
238 
239 
241 
242 static PGPROC *allProcs;
243 
244 /*
245  * Bookkeeping for tracking emulated transactions in recovery
246  */
250 
251 /*
252  * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
253  * the highest xid that might still be running that we don't have in
254  * KnownAssignedXids.
255  */
257 
258 /*
259  * State for visibility checks on different types of relations. See struct
260  * GlobalVisState for details. As shared, catalog, and user defined
261  * relations can have different horizons, one such state exists for each.
262  */
266 
267 /*
268  * This backend's RecentXmin at the last time the accurate xmin horizon was
269  * recomputed, or InvalidTransactionId if it has not. Used to limit how many
270  * times accurate horizons are recomputed. See GlobalVisTestShouldUpdate().
271  */
273 
274 #ifdef XIDCACHE_DEBUG
275 
276 /* counters for XidCache measurement */
277 static long xc_by_recent_xmin = 0;
278 static long xc_by_known_xact = 0;
279 static long xc_by_my_xact = 0;
280 static long xc_by_latest_xid = 0;
281 static long xc_by_main_xid = 0;
282 static long xc_by_child_xid = 0;
283 static long xc_by_known_assigned = 0;
284 static long xc_no_overflow = 0;
285 static long xc_slow_answer = 0;
286 
287 #define xc_by_recent_xmin_inc() (xc_by_recent_xmin++)
288 #define xc_by_known_xact_inc() (xc_by_known_xact++)
289 #define xc_by_my_xact_inc() (xc_by_my_xact++)
290 #define xc_by_latest_xid_inc() (xc_by_latest_xid++)
291 #define xc_by_main_xid_inc() (xc_by_main_xid++)
292 #define xc_by_child_xid_inc() (xc_by_child_xid++)
293 #define xc_by_known_assigned_inc() (xc_by_known_assigned++)
294 #define xc_no_overflow_inc() (xc_no_overflow++)
295 #define xc_slow_answer_inc() (xc_slow_answer++)
296 
297 static void DisplayXidCache(void);
298 #else /* !XIDCACHE_DEBUG */
299 
300 #define xc_by_recent_xmin_inc() ((void) 0)
301 #define xc_by_known_xact_inc() ((void) 0)
302 #define xc_by_my_xact_inc() ((void) 0)
303 #define xc_by_latest_xid_inc() ((void) 0)
304 #define xc_by_main_xid_inc() ((void) 0)
305 #define xc_by_child_xid_inc() ((void) 0)
306 #define xc_by_known_assigned_inc() ((void) 0)
307 #define xc_no_overflow_inc() ((void) 0)
308 #define xc_slow_answer_inc() ((void) 0)
309 #endif /* XIDCACHE_DEBUG */
310 
311 /* Primitives for KnownAssignedXids array handling for standby */
312 static void KnownAssignedXidsCompress(bool force);
313 static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
314  bool exclusive_lock);
315 static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
316 static bool KnownAssignedXidExists(TransactionId xid);
317 static void KnownAssignedXidsRemove(TransactionId xid);
318 static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
319  TransactionId *subxids);
321 static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
323  TransactionId *xmin,
324  TransactionId xmax);
326 static void KnownAssignedXidsDisplay(int trace_level);
327 static void KnownAssignedXidsReset(void);
328 static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid);
329 static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
330 static void MaintainLatestCompletedXid(TransactionId latestXid);
332 
334  TransactionId xid);
335 static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons);
336 
337 /*
338  * Report shared-memory space needed by CreateSharedProcArray.
339  */
340 Size
342 {
343  Size size;
344 
345  /* Size of the ProcArray structure itself */
346 #define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts)
347 
349  size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
350 
351  /*
352  * During Hot Standby processing we have a data structure called
353  * KnownAssignedXids, created in shared memory. Local data structures are
354  * also created in various backends during GetSnapshotData(),
355  * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
356  * main structures created in those functions must be identically sized,
357  * since we may at times copy the whole of the data structures around. We
358  * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
359  *
360  * Ideally we'd only create this structure if we were actually doing hot
361  * standby in the current run, but we don't know that yet at the time
362  * shared memory is being set up.
363  */
364 #define TOTAL_MAX_CACHED_SUBXIDS \
365  ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
366 
367  if (EnableHotStandby)
368  {
369  size = add_size(size,
370  mul_size(sizeof(TransactionId),
372  size = add_size(size,
373  mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
374  }
375 
376  return size;
377 }
378 
379 /*
380  * Initialize the shared PGPROC array during postmaster startup.
381  */
382 void
384 {
385  bool found;
386 
387  /* Create or attach to the ProcArray shared structure */
388  procArray = (ProcArrayStruct *)
389  ShmemInitStruct("Proc Array",
391  mul_size(sizeof(int),
393  &found);
394 
395  if (!found)
396  {
397  /*
398  * We're the first - initialize.
399  */
400  procArray->numProcs = 0;
401  procArray->maxProcs = PROCARRAY_MAXPROCS;
403  procArray->numKnownAssignedXids = 0;
404  procArray->tailKnownAssignedXids = 0;
405  procArray->headKnownAssignedXids = 0;
411  }
412 
413  allProcs = ProcGlobal->allProcs;
414 
415  /* Create or attach to the KnownAssignedXids arrays too, if needed */
416  if (EnableHotStandby)
417  {
419  ShmemInitStruct("KnownAssignedXids",
420  mul_size(sizeof(TransactionId),
422  &found);
423  KnownAssignedXidsValid = (bool *)
424  ShmemInitStruct("KnownAssignedXidsValid",
425  mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
426  &found);
427  }
428 }
429 
430 /*
431  * Add the specified PGPROC to the shared array.
432  */
433 void
435 {
436  ProcArrayStruct *arrayP = procArray;
437  int index;
438 
439  /* See ProcGlobal comment explaining why both locks are held */
440  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
441  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
442 
443  if (arrayP->numProcs >= arrayP->maxProcs)
444  {
445  /*
446  * Oops, no room. (This really shouldn't happen, since there is a
447  * fixed supply of PGPROC structs too, and so we should have failed
448  * earlier.)
449  */
450  ereport(FATAL,
451  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
452  errmsg("sorry, too many clients already")));
453  }
454 
455  /*
456  * Keep the procs array sorted by (PGPROC *) so that we can utilize
457  * locality of references much better. This is useful while traversing the
458  * ProcArray because there is an increased likelihood of finding the next
459  * PGPROC structure in the cache.
460  *
461  * Since the occurrence of adding/removing a proc is much lower than the
462  * access to the ProcArray itself, the overhead should be marginal
463  */
464  for (index = 0; index < arrayP->numProcs; index++)
465  {
466  /*
467  * If we are the first PGPROC or if we have found our right position
468  * in the array, break
469  */
470  if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno))
471  break;
472  }
473 
474  memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index],
475  (arrayP->numProcs - index) * sizeof(*arrayP->pgprocnos));
476  memmove(&ProcGlobal->xids[index + 1], &ProcGlobal->xids[index],
477  (arrayP->numProcs - index) * sizeof(*ProcGlobal->xids));
478  memmove(&ProcGlobal->subxidStates[index + 1], &ProcGlobal->subxidStates[index],
479  (arrayP->numProcs - index) * sizeof(*ProcGlobal->subxidStates));
480  memmove(&ProcGlobal->vacuumFlags[index + 1], &ProcGlobal->vacuumFlags[index],
481  (arrayP->numProcs - index) * sizeof(*ProcGlobal->vacuumFlags));
482 
483  arrayP->pgprocnos[index] = proc->pgprocno;
484  ProcGlobal->xids[index] = proc->xid;
485  ProcGlobal->subxidStates[index] = proc->subxidStatus;
486  ProcGlobal->vacuumFlags[index] = proc->vacuumFlags;
487 
488  arrayP->numProcs++;
489 
490  for (; index < arrayP->numProcs; index++)
491  {
492  allProcs[arrayP->pgprocnos[index]].pgxactoff = index;
493  }
494 
495  /*
496  * Release in reversed acquisition order, to reduce frequency of having to
497  * wait for XidGenLock while holding ProcArrayLock.
498  */
499  LWLockRelease(XidGenLock);
500  LWLockRelease(ProcArrayLock);
501 }
502 
503 /*
504  * Remove the specified PGPROC from the shared array.
505  *
506  * When latestXid is a valid XID, we are removing a live 2PC gxact from the
507  * array, and thus causing it to appear as "not running" anymore. In this
508  * case we must advance latestCompletedXid. (This is essentially the same
509  * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
510  * the ProcArrayLock only once, and don't damage the content of the PGPROC;
511  * twophase.c depends on the latter.)
512  */
513 void
515 {
516  ProcArrayStruct *arrayP = procArray;
517  int index;
518 
519 #ifdef XIDCACHE_DEBUG
520  /* dump stats at backend shutdown, but not prepared-xact end */
521  if (proc->pid != 0)
522  DisplayXidCache();
523 #endif
524 
525  /* See ProcGlobal comment explaining why both locks are held */
526  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
527  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
528 
529  Assert(ProcGlobal->allProcs[arrayP->pgprocnos[proc->pgxactoff]].pgxactoff == proc->pgxactoff);
530 
531  if (TransactionIdIsValid(latestXid))
532  {
534 
535  /* Advance global latestCompletedXid while holding the lock */
536  MaintainLatestCompletedXid(latestXid);
537 
538  /* Same with xactCompletionCount */
540 
541  ProcGlobal->xids[proc->pgxactoff] = 0;
544  }
545  else
546  {
547  /* Shouldn't be trying to remove a live transaction here */
549  }
550 
554  ProcGlobal->vacuumFlags[proc->pgxactoff] = 0;
555 
556  for (index = 0; index < arrayP->numProcs; index++)
557  {
558  if (arrayP->pgprocnos[index] == proc->pgprocno)
559  {
560  /* Keep the PGPROC array sorted. See notes above */
561  memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1],
562  (arrayP->numProcs - index - 1) * sizeof(*arrayP->pgprocnos));
563  memmove(&ProcGlobal->xids[index], &ProcGlobal->xids[index + 1],
564  (arrayP->numProcs - index - 1) * sizeof(*ProcGlobal->xids));
565  memmove(&ProcGlobal->subxidStates[index], &ProcGlobal->subxidStates[index + 1],
566  (arrayP->numProcs - index - 1) * sizeof(*ProcGlobal->subxidStates));
567  memmove(&ProcGlobal->vacuumFlags[index], &ProcGlobal->vacuumFlags[index + 1],
568  (arrayP->numProcs - index - 1) * sizeof(*ProcGlobal->vacuumFlags));
569 
570  arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
571  arrayP->numProcs--;
572 
573  /* adjust for removed PGPROC */
574  for (; index < arrayP->numProcs; index++)
575  allProcs[arrayP->pgprocnos[index]].pgxactoff--;
576 
577  /*
578  * Release in reversed acquisition order, to reduce frequency of
579  * having to wait for XidGenLock while holding ProcArrayLock.
580  */
581  LWLockRelease(XidGenLock);
582  LWLockRelease(ProcArrayLock);
583  return;
584  }
585  }
586 
587  /* Oops */
588  LWLockRelease(XidGenLock);
589  LWLockRelease(ProcArrayLock);
590 
591  elog(LOG, "failed to find proc %p in ProcArray", proc);
592 }
593 
594 
595 /*
596  * ProcArrayEndTransaction -- mark a transaction as no longer running
597  *
598  * This is used interchangeably for commit and abort cases. The transaction
599  * commit/abort must already be reported to WAL and pg_xact.
600  *
601  * proc is currently always MyProc, but we pass it explicitly for flexibility.
602  * latestXid is the latest Xid among the transaction's main XID and
603  * subtransactions, or InvalidTransactionId if it has no XID. (We must ask
604  * the caller to pass latestXid, instead of computing it from the PGPROC's
605  * contents, because the subxid information in the PGPROC might be
606  * incomplete.)
607  */
608 void
610 {
611  if (TransactionIdIsValid(latestXid))
612  {
613  /*
614  * We must lock ProcArrayLock while clearing our advertised XID, so
615  * that we do not exit the set of "running" transactions while someone
616  * else is taking a snapshot. See discussion in
617  * src/backend/access/transam/README.
618  */
620 
621  /*
622  * If we can immediately acquire ProcArrayLock, we clear our own XID
623  * and release the lock. If not, use group XID clearing to improve
624  * efficiency.
625  */
626  if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
627  {
628  ProcArrayEndTransactionInternal(proc, latestXid);
629  LWLockRelease(ProcArrayLock);
630  }
631  else
632  ProcArrayGroupClearXid(proc, latestXid);
633  }
634  else
635  {
636  /*
637  * If we have no XID, we don't need to lock, since we won't affect
638  * anyone else's calculation of a snapshot. We might change their
639  * estimate of global xmin, but that's OK.
640  */
642  Assert(proc->subxidStatus.count == 0);
644 
646  proc->xmin = InvalidTransactionId;
647  proc->delayChkpt = false; /* be sure this is cleared in abort */
648  proc->recoveryConflictPending = false;
649 
650  /* must be cleared with xid/xmin: */
651  /* avoid unnecessarily dirtying shared cachelines */
653  {
654  Assert(!LWLockHeldByMe(ProcArrayLock));
655  LWLockAcquire(ProcArrayLock, LW_SHARED);
659  LWLockRelease(ProcArrayLock);
660  }
661  }
662 }
663 
664 /*
665  * Mark a write transaction as no longer running.
666  *
667  * We don't do any locking here; caller must handle that.
668  */
669 static inline void
671 {
672  size_t pgxactoff = proc->pgxactoff;
673 
674  Assert(LWLockHeldByMe(ProcArrayLock));
676  Assert(ProcGlobal->xids[pgxactoff] == proc->xid);
677 
678  ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
679  proc->xid = InvalidTransactionId;
681  proc->xmin = InvalidTransactionId;
682  proc->delayChkpt = false; /* be sure this is cleared in abort */
683  proc->recoveryConflictPending = false;
684 
685  /* must be cleared with xid/xmin: */
686  /* avoid unnecessarily dirtying shared cachelines */
688  {
691  }
692 
693  /* Clear the subtransaction-XID cache too while holding the lock */
694  Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
696  if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
697  {
698  ProcGlobal->subxidStates[pgxactoff].count = 0;
699  ProcGlobal->subxidStates[pgxactoff].overflowed = false;
700  proc->subxidStatus.count = 0;
701  proc->subxidStatus.overflowed = false;
702  }
703 
704  /* Also advance global latestCompletedXid while holding the lock */
705  MaintainLatestCompletedXid(latestXid);
706 
707  /* Same with xactCompletionCount */
709 }
710 
711 /*
712  * ProcArrayGroupClearXid -- group XID clearing
713  *
714  * When we cannot immediately acquire ProcArrayLock in exclusive mode at
715  * commit time, add ourselves to a list of processes that need their XIDs
716  * cleared. The first process to add itself to the list will acquire
717  * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
718  * on behalf of all group members. This avoids a great deal of contention
719  * around ProcArrayLock when many processes are trying to commit at once,
720  * since the lock need not be repeatedly handed off from one committing
721  * process to the next.
722  */
723 static void
725 {
726  PROC_HDR *procglobal = ProcGlobal;
727  uint32 nextidx;
728  uint32 wakeidx;
729 
730  /* We should definitely have an XID to clear. */
732 
733  /* Add ourselves to the list of processes needing a group XID clear. */
734  proc->procArrayGroupMember = true;
735  proc->procArrayGroupMemberXid = latestXid;
736  nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
737  while (true)
738  {
739  pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx);
740 
742  &nextidx,
743  (uint32) proc->pgprocno))
744  break;
745  }
746 
747  /*
748  * If the list was not empty, the leader will clear our XID. It is
749  * impossible to have followers without a leader because the first process
750  * that has added itself to the list will always have nextidx as
751  * INVALID_PGPROCNO.
752  */
753  if (nextidx != INVALID_PGPROCNO)
754  {
755  int extraWaits = 0;
756 
757  /* Sleep until the leader clears our XID. */
759  for (;;)
760  {
761  /* acts as a read barrier */
762  PGSemaphoreLock(proc->sem);
763  if (!proc->procArrayGroupMember)
764  break;
765  extraWaits++;
766  }
768 
770 
771  /* Fix semaphore count for any absorbed wakeups */
772  while (extraWaits-- > 0)
773  PGSemaphoreUnlock(proc->sem);
774  return;
775  }
776 
777  /* We are the leader. Acquire the lock on behalf of everyone. */
778  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
779 
780  /*
781  * Now that we've got the lock, clear the list of processes waiting for
782  * group XID clearing, saving a pointer to the head of the list. Trying
783  * to pop elements one at a time could lead to an ABA problem.
784  */
785  nextidx = pg_atomic_exchange_u32(&procglobal->procArrayGroupFirst,
787 
788  /* Remember head of list so we can perform wakeups after dropping lock. */
789  wakeidx = nextidx;
790 
791  /* Walk the list and clear all XIDs. */
792  while (nextidx != INVALID_PGPROCNO)
793  {
794  PGPROC *proc = &allProcs[nextidx];
795 
797 
798  /* Move to next proc in list. */
799  nextidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
800  }
801 
802  /* We're done with the lock now. */
803  LWLockRelease(ProcArrayLock);
804 
805  /*
806  * Now that we've released the lock, go back and wake everybody up. We
807  * don't do this under the lock so as to keep lock hold times to a
808  * minimum. The system calls we need to perform to wake other processes
809  * up are probably much slower than the simple memory writes we did while
810  * holding the lock.
811  */
812  while (wakeidx != INVALID_PGPROCNO)
813  {
814  PGPROC *proc = &allProcs[wakeidx];
815 
816  wakeidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
818 
819  /* ensure all previous writes are visible before follower continues. */
821 
822  proc->procArrayGroupMember = false;
823 
824  if (proc != MyProc)
825  PGSemaphoreUnlock(proc->sem);
826  }
827 }
828 
829 /*
830  * ProcArrayClearTransaction -- clear the transaction fields
831  *
832  * This is used after successfully preparing a 2-phase transaction. We are
833  * not actually reporting the transaction's XID as no longer running --- it
834  * will still appear as running because the 2PC's gxact is in the ProcArray
835  * too. We just have to clear out our own PGPROC.
836  */
837 void
839 {
840  size_t pgxactoff;
841 
842  /*
843  * Currently we need to lock ProcArrayLock exclusively here, as we
844  * increment xactCompletionCount below. We also need it at least in shared
845  * mode for pgproc->pgxactoff to stay the same below.
846  *
847  * We could however, as this action does not actually change anyone's view
848  * of the set of running XIDs (our entry is duplicate with the gxact that
849  * has already been inserted into the ProcArray), lower the lock level to
850  * shared if we were to make xactCompletionCount an atomic variable. But
851  * that doesn't seem worth it currently, as a 2PC commit is heavyweight
852  * enough for this not to be the bottleneck. If it ever becomes a
853  * bottleneck it may also be worth considering to combine this with the
854  * subsequent ProcArrayRemove()
855  */
856  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
857 
858  pgxactoff = proc->pgxactoff;
859 
860  ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
861  proc->xid = InvalidTransactionId;
862 
864  proc->xmin = InvalidTransactionId;
865  proc->recoveryConflictPending = false;
866 
868  Assert(!proc->delayChkpt);
869 
870  /*
871  * Need to increment completion count even though transaction hasn't
872  * really committed yet. The reason for that is that GetSnapshotData()
873  * omits the xid of the current transaction, thus without the increment we
874  * otherwise could end up reusing the snapshot later. Which would be bad,
875  * because it might not count the prepared transaction as running.
876  */
878 
879  /* Clear the subtransaction-XID cache too */
880  Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
882  if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
883  {
884  ProcGlobal->subxidStates[pgxactoff].count = 0;
885  ProcGlobal->subxidStates[pgxactoff].overflowed = false;
886  proc->subxidStatus.count = 0;
887  proc->subxidStatus.overflowed = false;
888  }
889 
890  LWLockRelease(ProcArrayLock);
891 }
892 
893 /*
894  * Update ShmemVariableCache->latestCompletedXid to point to latestXid if
895  * currently older.
896  */
897 static void
899 {
901 
902  Assert(FullTransactionIdIsValid(cur_latest));
904  Assert(LWLockHeldByMe(ProcArrayLock));
905 
906  if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
907  {
909  FullXidRelativeTo(cur_latest, latestXid);
910  }
911 
914 }
915 
916 /*
917  * Same as MaintainLatestCompletedXid, except for use during WAL replay.
918  */
919 static void
921 {
923  FullTransactionId rel;
924 
926  Assert(LWLockHeldByMe(ProcArrayLock));
927 
928  /*
929  * Need a FullTransactionId to compare latestXid with. Can't rely on
930  * latestCompletedXid to be initialized in recovery. But in recovery it's
931  * safe to access nextXid without a lock for the startup process.
932  */
935 
936  if (!FullTransactionIdIsValid(cur_latest) ||
937  TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
938  {
940  FullXidRelativeTo(rel, latestXid);
941  }
942 
944 }
945 
946 /*
947  * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
948  *
949  * Remember up to where the startup process initialized the CLOG and subtrans
950  * so we can ensure it's initialized gaplessly up to the point where necessary
951  * while in recovery.
952  */
953 void
955 {
957  Assert(TransactionIdIsNormal(initializedUptoXID));
958 
959  /*
960  * we set latestObservedXid to the xid SUBTRANS has been initialized up
961  * to, so we can extend it from that point onwards in
962  * RecordKnownAssignedTransactionIds, and when we get consistent in
963  * ProcArrayApplyRecoveryInfo().
964  */
965  latestObservedXid = initializedUptoXID;
967 }
968 
969 /*
970  * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
971  *
972  * Takes us through 3 states: Initialized, Pending and Ready.
973  * Normal case is to go all the way to Ready straight away, though there
974  * are atypical cases where we need to take it in steps.
975  *
976  * Use the data about running transactions on the primary to create the initial
977  * state of KnownAssignedXids. We also use these records to regularly prune
978  * KnownAssignedXids because we know it is possible that some transactions
979  * with FATAL errors fail to write abort records, which could cause eventual
980  * overflow.
981  *
982  * See comments for LogStandbySnapshot().
983  */
984 void
986 {
987  TransactionId *xids;
988  int nxids;
989  int i;
990 
995 
996  /*
997  * Remove stale transactions, if any.
998  */
1000 
1001  /*
1002  * Remove stale locks, if any.
1003  */
1005 
1006  /*
1007  * If our snapshot is already valid, nothing else to do...
1008  */
1010  return;
1011 
1012  /*
1013  * If our initial RunningTransactionsData had an overflowed snapshot then
1014  * we knew we were missing some subxids from our snapshot. If we continue
1015  * to see overflowed snapshots then we might never be able to start up, so
1016  * we make another test to see if our snapshot is now valid. We know that
1017  * the missing subxids are equal to or earlier than nextXid. After we
1018  * initialise we continue to apply changes during recovery, so once the
1019  * oldestRunningXid is later than the nextXid from the initial snapshot we
1020  * know that we no longer have missing information and can mark the
1021  * snapshot as valid.
1022  */
1024  {
1025  /*
1026  * If the snapshot isn't overflowed or if its empty we can reset our
1027  * pending state and use this snapshot instead.
1028  */
1029  if (!running->subxid_overflow || running->xcnt == 0)
1030  {
1031  /*
1032  * If we have already collected known assigned xids, we need to
1033  * throw them away before we apply the recovery snapshot.
1034  */
1037  }
1038  else
1039  {
1041  running->oldestRunningXid))
1042  {
1045  "recovery snapshots are now enabled");
1046  }
1047  else
1049  "recovery snapshot waiting for non-overflowed snapshot or "
1050  "until oldest active xid on standby is at least %u (now %u)",
1052  running->oldestRunningXid);
1053  return;
1054  }
1055  }
1056 
1058 
1059  /*
1060  * NB: this can be reached at least twice, so make sure new code can deal
1061  * with that.
1062  */
1063 
1064  /*
1065  * Nobody else is running yet, but take locks anyhow
1066  */
1067  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1068 
1069  /*
1070  * KnownAssignedXids is sorted so we cannot just add the xids, we have to
1071  * sort them first.
1072  *
1073  * Some of the new xids are top-level xids and some are subtransactions.
1074  * We don't call SubTransSetParent because it doesn't matter yet. If we
1075  * aren't overflowed then all xids will fit in snapshot and so we don't
1076  * need subtrans. If we later overflow, an xid assignment record will add
1077  * xids to subtrans. If RunningTransactionsData is overflowed then we
1078  * don't have enough information to correctly update subtrans anyway.
1079  */
1080 
1081  /*
1082  * Allocate a temporary array to avoid modifying the array passed as
1083  * argument.
1084  */
1085  xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt));
1086 
1087  /*
1088  * Add to the temp array any xids which have not already completed.
1089  */
1090  nxids = 0;
1091  for (i = 0; i < running->xcnt + running->subxcnt; i++)
1092  {
1093  TransactionId xid = running->xids[i];
1094 
1095  /*
1096  * The running-xacts snapshot can contain xids that were still visible
1097  * in the procarray when the snapshot was taken, but were already
1098  * WAL-logged as completed. They're not running anymore, so ignore
1099  * them.
1100  */
1102  continue;
1103 
1104  xids[nxids++] = xid;
1105  }
1106 
1107  if (nxids > 0)
1108  {
1109  if (procArray->numKnownAssignedXids != 0)
1110  {
1111  LWLockRelease(ProcArrayLock);
1112  elog(ERROR, "KnownAssignedXids is not empty");
1113  }
1114 
1115  /*
1116  * Sort the array so that we can add them safely into
1117  * KnownAssignedXids.
1118  */
1119  qsort(xids, nxids, sizeof(TransactionId), xidComparator);
1120 
1121  /*
1122  * Add the sorted snapshot into KnownAssignedXids. The running-xacts
1123  * snapshot may include duplicated xids because of prepared
1124  * transactions, so ignore them.
1125  */
1126  for (i = 0; i < nxids; i++)
1127  {
1128  if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i]))
1129  {
1130  elog(DEBUG1,
1131  "found duplicated transaction %u for KnownAssignedXids insertion",
1132  xids[i]);
1133  continue;
1134  }
1135  KnownAssignedXidsAdd(xids[i], xids[i], true);
1136  }
1137 
1139  }
1140 
1141  pfree(xids);
1142 
1143  /*
1144  * latestObservedXid is at least set to the point where SUBTRANS was
1145  * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid
1146  * RecordKnownAssignedTransactionIds() was called for. Initialize
1147  * subtrans from thereon, up to nextXid - 1.
1148  *
1149  * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
1150  * because we've just added xids to the known assigned xids machinery that
1151  * haven't gone through RecordKnownAssignedTransactionId().
1152  */
1156  {
1159  }
1160  TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */
1161 
1162  /* ----------
1163  * Now we've got the running xids we need to set the global values that
1164  * are used to track snapshots as they evolve further.
1165  *
1166  * - latestCompletedXid which will be the xmax for snapshots
1167  * - lastOverflowedXid which shows whether snapshots overflow
1168  * - nextXid
1169  *
1170  * If the snapshot overflowed, then we still initialise with what we know,
1171  * but the recovery snapshot isn't fully valid yet because we know there
1172  * are some subxids missing. We don't know the specific subxids that are
1173  * missing, so conservatively assume the last one is latestObservedXid.
1174  * ----------
1175  */
1176  if (running->subxid_overflow)
1177  {
1179 
1181  procArray->lastOverflowedXid = latestObservedXid;
1182  }
1183  else
1184  {
1186 
1188  }
1189 
1190  /*
1191  * If a transaction wrote a commit record in the gap between taking and
1192  * logging the snapshot then latestCompletedXid may already be higher than
1193  * the value from the snapshot, so check before we use the incoming value.
1194  * It also might not yet be set at all.
1195  */
1197 
1198  LWLockRelease(ProcArrayLock);
1199 
1200  /* ShmemVariableCache->nextXid must be beyond any observed xid. */
1202 
1204 
1207  elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled");
1208  else
1210  "recovery snapshot waiting for non-overflowed snapshot or "
1211  "until oldest active xid on standby is at least %u (now %u)",
1213  running->oldestRunningXid);
1214 }
1215 
1216 /*
1217  * ProcArrayApplyXidAssignment
1218  * Process an XLOG_XACT_ASSIGNMENT WAL record
1219  */
1220 void
1222  int nsubxids, TransactionId *subxids)
1223 {
1224  TransactionId max_xid;
1225  int i;
1226 
1228 
1229  max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
1230 
1231  /*
1232  * Mark all the subtransactions as observed.
1233  *
1234  * NOTE: This will fail if the subxid contains too many previously
1235  * unobserved xids to fit into known-assigned-xids. That shouldn't happen
1236  * as the code stands, because xid-assignment records should never contain
1237  * more than PGPROC_MAX_CACHED_SUBXIDS entries.
1238  */
1240 
1241  /*
1242  * Notice that we update pg_subtrans with the top-level xid, rather than
1243  * the parent xid. This is a difference between normal processing and
1244  * recovery, yet is still correct in all cases. The reason is that
1245  * subtransaction commit is not marked in clog until commit processing, so
1246  * all aborted subtransactions have already been clearly marked in clog.
1247  * As a result we are able to refer directly to the top-level
1248  * transaction's state rather than skipping through all the intermediate
1249  * states in the subtransaction tree. This should be the first time we
1250  * have attempted to SubTransSetParent().
1251  */
1252  for (i = 0; i < nsubxids; i++)
1253  SubTransSetParent(subxids[i], topxid);
1254 
1255  /* KnownAssignedXids isn't maintained yet, so we're done for now */
1257  return;
1258 
1259  /*
1260  * Uses same locking as transaction commit
1261  */
1262  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1263 
1264  /*
1265  * Remove subxids from known-assigned-xacts.
1266  */
1268 
1269  /*
1270  * Advance lastOverflowedXid to be at least the last of these subxids.
1271  */
1272  if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid))
1273  procArray->lastOverflowedXid = max_xid;
1274 
1275  LWLockRelease(ProcArrayLock);
1276 }
1277 
1278 /*
1279  * TransactionIdIsInProgress -- is given transaction running in some backend
1280  *
1281  * Aside from some shortcuts such as checking RecentXmin and our own Xid,
1282  * there are four possibilities for finding a running transaction:
1283  *
1284  * 1. The given Xid is a main transaction Id. We will find this out cheaply
1285  * by looking at ProcGlobal->xids.
1286  *
1287  * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
1288  * We can find this out cheaply too.
1289  *
1290  * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
1291  * if the Xid is running on the primary.
1292  *
1293  * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
1294  * if that is running according to ProcGlobal->xids[] or KnownAssignedXids.
1295  * This is the slowest way, but sadly it has to be done always if the others
1296  * failed, unless we see that the cached subxact sets are complete (none have
1297  * overflowed).
1298  *
1299  * ProcArrayLock has to be held while we do 1, 2, 3. If we save the top Xids
1300  * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
1301  * This buys back some concurrency (and we can't retrieve the main Xids from
1302  * ProcGlobal->xids[] again anyway; see GetNewTransactionId).
1303  */
1304 bool
1306 {
1307  static TransactionId *xids = NULL;
1308  static TransactionId *other_xids;
1309  XidCacheStatus *other_subxidstates;
1310  int nxids = 0;
1311  ProcArrayStruct *arrayP = procArray;
1312  TransactionId topxid;
1313  TransactionId latestCompletedXid;
1314  int mypgxactoff;
1315  size_t numProcs;
1316  int j;
1317 
1318  /*
1319  * Don't bother checking a transaction older than RecentXmin; it could not
1320  * possibly still be running. (Note: in particular, this guarantees that
1321  * we reject InvalidTransactionId, FrozenTransactionId, etc as not
1322  * running.)
1323  */
1325  {
1327  return false;
1328  }
1329 
1330  /*
1331  * We may have just checked the status of this transaction, so if it is
1332  * already known to be completed, we can fall out without any access to
1333  * shared memory.
1334  */
1336  {
1338  return false;
1339  }
1340 
1341  /*
1342  * Also, we can handle our own transaction (and subtransactions) without
1343  * any access to shared memory.
1344  */
1346  {
1348  return true;
1349  }
1350 
1351  /*
1352  * If first time through, get workspace to remember main XIDs in. We
1353  * malloc it permanently to avoid repeated palloc/pfree overhead.
1354  */
1355  if (xids == NULL)
1356  {
1357  /*
1358  * In hot standby mode, reserve enough space to hold all xids in the
1359  * known-assigned list. If we later finish recovery, we no longer need
1360  * the bigger array, but we don't bother to shrink it.
1361  */
1362  int maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs;
1363 
1364  xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId));
1365  if (xids == NULL)
1366  ereport(ERROR,
1367  (errcode(ERRCODE_OUT_OF_MEMORY),
1368  errmsg("out of memory")));
1369  }
1370 
1371  other_xids = ProcGlobal->xids;
1372  other_subxidstates = ProcGlobal->subxidStates;
1373 
1374  LWLockAcquire(ProcArrayLock, LW_SHARED);
1375 
1376  /*
1377  * Now that we have the lock, we can check latestCompletedXid; if the
1378  * target Xid is after that, it's surely still running.
1379  */
1380  latestCompletedXid =
1382  if (TransactionIdPrecedes(latestCompletedXid, xid))
1383  {
1384  LWLockRelease(ProcArrayLock);
1386  return true;
1387  }
1388 
1389  /* No shortcuts, gotta grovel through the array */
1390  mypgxactoff = MyProc->pgxactoff;
1391  numProcs = arrayP->numProcs;
1392  for (size_t pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
1393  {
1394  int pgprocno;
1395  PGPROC *proc;
1396  TransactionId pxid;
1397  int pxids;
1398 
1399  /* Ignore ourselves --- dealt with it above */
1400  if (pgxactoff == mypgxactoff)
1401  continue;
1402 
1403  /* Fetch xid just once - see GetNewTransactionId */
1404  pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
1405 
1406  if (!TransactionIdIsValid(pxid))
1407  continue;
1408 
1409  /*
1410  * Step 1: check the main Xid
1411  */
1412  if (TransactionIdEquals(pxid, xid))
1413  {
1414  LWLockRelease(ProcArrayLock);
1416  return true;
1417  }
1418 
1419  /*
1420  * We can ignore main Xids that are younger than the target Xid, since
1421  * the target could not possibly be their child.
1422  */
1423  if (TransactionIdPrecedes(xid, pxid))
1424  continue;
1425 
1426  /*
1427  * Step 2: check the cached child-Xids arrays
1428  */
1429  pxids = other_subxidstates[pgxactoff].count;
1430  pg_read_barrier(); /* pairs with barrier in GetNewTransactionId() */
1431  pgprocno = arrayP->pgprocnos[pgxactoff];
1432  proc = &allProcs[pgprocno];
1433  for (j = pxids - 1; j >= 0; j--)
1434  {
1435  /* Fetch xid just once - see GetNewTransactionId */
1436  TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]);
1437 
1438  if (TransactionIdEquals(cxid, xid))
1439  {
1440  LWLockRelease(ProcArrayLock);
1442  return true;
1443  }
1444  }
1445 
1446  /*
1447  * Save the main Xid for step 4. We only need to remember main Xids
1448  * that have uncached children. (Note: there is no race condition
1449  * here because the overflowed flag cannot be cleared, only set, while
1450  * we hold ProcArrayLock. So we can't miss an Xid that we need to
1451  * worry about.)
1452  */
1453  if (other_subxidstates[pgxactoff].overflowed)
1454  xids[nxids++] = pxid;
1455  }
1456 
1457  /*
1458  * Step 3: in hot standby mode, check the known-assigned-xids list. XIDs
1459  * in the list must be treated as running.
1460  */
1461  if (RecoveryInProgress())
1462  {
1463  /* none of the PGPROC entries should have XIDs in hot standby mode */
1464  Assert(nxids == 0);
1465 
1466  if (KnownAssignedXidExists(xid))
1467  {
1468  LWLockRelease(ProcArrayLock);
1470  return true;
1471  }
1472 
1473  /*
1474  * If the KnownAssignedXids overflowed, we have to check pg_subtrans
1475  * too. Fetch all xids from KnownAssignedXids that are lower than
1476  * xid, since if xid is a subtransaction its parent will always have a
1477  * lower value. Note we will collect both main and subXIDs here, but
1478  * there's no help for it.
1479  */
1480  if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid))
1481  nxids = KnownAssignedXidsGet(xids, xid);
1482  }
1483 
1484  LWLockRelease(ProcArrayLock);
1485 
1486  /*
1487  * If none of the relevant caches overflowed, we know the Xid is not
1488  * running without even looking at pg_subtrans.
1489  */
1490  if (nxids == 0)
1491  {
1493  return false;
1494  }
1495 
1496  /*
1497  * Step 4: have to check pg_subtrans.
1498  *
1499  * At this point, we know it's either a subtransaction of one of the Xids
1500  * in xids[], or it's not running. If it's an already-failed
1501  * subtransaction, we want to say "not running" even though its parent may
1502  * still be running. So first, check pg_xact to see if it's been aborted.
1503  */
1505 
1506  if (TransactionIdDidAbort(xid))
1507  return false;
1508 
1509  /*
1510  * It isn't aborted, so check whether the transaction tree it belongs to
1511  * is still running (or, more precisely, whether it was running when we
1512  * held ProcArrayLock).
1513  */
1514  topxid = SubTransGetTopmostTransaction(xid);
1515  Assert(TransactionIdIsValid(topxid));
1516  if (!TransactionIdEquals(topxid, xid))
1517  {
1518  for (int i = 0; i < nxids; i++)
1519  {
1520  if (TransactionIdEquals(xids[i], topxid))
1521  return true;
1522  }
1523  }
1524 
1525  return false;
1526 }
1527 
1528 /*
1529  * TransactionIdIsActive -- is xid the top-level XID of an active backend?
1530  *
1531  * This differs from TransactionIdIsInProgress in that it ignores prepared
1532  * transactions, as well as transactions running on the primary if we're in
1533  * hot standby. Also, we ignore subtransactions since that's not needed
1534  * for current uses.
1535  */
1536 bool
1538 {
1539  bool result = false;
1540  ProcArrayStruct *arrayP = procArray;
1541  TransactionId *other_xids = ProcGlobal->xids;
1542  int i;
1543 
1544  /*
1545  * Don't bother checking a transaction older than RecentXmin; it could not
1546  * possibly still be running.
1547  */
1549  return false;
1550 
1551  LWLockAcquire(ProcArrayLock, LW_SHARED);
1552 
1553  for (i = 0; i < arrayP->numProcs; i++)
1554  {
1555  int pgprocno = arrayP->pgprocnos[i];
1556  PGPROC *proc = &allProcs[pgprocno];
1557  TransactionId pxid;
1558 
1559  /* Fetch xid just once - see GetNewTransactionId */
1560  pxid = UINT32_ACCESS_ONCE(other_xids[i]);
1561 
1562  if (!TransactionIdIsValid(pxid))
1563  continue;
1564 
1565  if (proc->pid == 0)
1566  continue; /* ignore prepared transactions */
1567 
1568  if (TransactionIdEquals(pxid, xid))
1569  {
1570  result = true;
1571  break;
1572  }
1573  }
1574 
1575  LWLockRelease(ProcArrayLock);
1576 
1577  return result;
1578 }
1579 
1580 
1581 /*
1582  * Determine XID horizons.
1583  *
1584  * This is used by wrapper functions like GetOldestNonRemovableTransactionId()
1585  * (for VACUUM), GetReplicationHorizons() (for hot_standby_feedback), etc as
1586  * well as "internally" by GlobalVisUpdate() (see comment above struct
1587  * GlobalVisState).
1588  *
1589  * See the definition of ComputedXidHorizonsResult for the various computed
1590  * horizons.
1591  *
1592  * For VACUUM separate horizons (used to to decide which deleted tuples must
1593  * be preserved), for shared and non-shared tables are computed. For shared
1594  * relations backends in all databases must be considered, but for non-shared
1595  * relations that's not required, since only backends in my own database could
1596  * ever see the tuples in them. Also, we can ignore concurrently running lazy
1597  * VACUUMs because (a) they must be working on other tables, and (b) they
1598  * don't need to do snapshot-based lookups.
1599  *
1600  * This also computes a horizon used to truncate pg_subtrans. For that
1601  * backends in all databases have to be considered, and concurrently running
1602  * lazy VACUUMs cannot be ignored, as they still may perform pg_subtrans
1603  * accesses.
1604  *
1605  * Note: we include all currently running xids in the set of considered xids.
1606  * This ensures that if a just-started xact has not yet set its snapshot,
1607  * when it does set the snapshot it cannot set xmin less than what we compute.
1608  * See notes in src/backend/access/transam/README.
1609  *
1610  * Note: despite the above, it's possible for the calculated values to move
1611  * backwards on repeated calls. The calculated values are conservative, so
1612  * that anything older is definitely not considered as running by anyone
1613  * anymore, but the exact values calculated depend on a number of things. For
1614  * example, if there are no transactions running in the current database, the
1615  * horizon for normal tables will be latestCompletedXid. If a transaction
1616  * begins after that, its xmin will include in-progress transactions in other
1617  * databases that started earlier, so another call will return a lower value.
1618  * Nonetheless it is safe to vacuum a table in the current database with the
1619  * first result. There are also replication-related effects: a walsender
1620  * process can set its xmin based on transactions that are no longer running
1621  * on the primary but are still being replayed on the standby, thus possibly
1622  * making the values go backwards. In this case there is a possibility that
1623  * we lose data that the standby would like to have, but unless the standby
1624  * uses a replication slot to make its xmin persistent there is little we can
1625  * do about that --- data is only protected if the walsender runs continuously
1626  * while queries are executed on the standby. (The Hot Standby code deals
1627  * with such cases by failing standby queries that needed to access
1628  * already-removed data, so there's no integrity bug.) The computed values
1629  * are also adjusted with vacuum_defer_cleanup_age, so increasing that setting
1630  * on the fly is another easy way to make horizons move backwards, with no
1631  * consequences for data integrity.
1632  *
1633  * Note: the approximate horizons (see definition of GlobalVisState) are
1634  * updated by the computations done here. That's currently required for
1635  * correctness and a small optimization. Without doing so it's possible that
1636  * heap vacuum's call to heap_page_prune() uses a more conservative horizon
1637  * than later when deciding which tuples can be removed - which the code
1638  * doesn't expect (breaking HOT).
1639  */
1640 static void
1642 {
1643  ProcArrayStruct *arrayP = procArray;
1644  TransactionId kaxmin;
1645  bool in_recovery = RecoveryInProgress();
1646  TransactionId *other_xids = ProcGlobal->xids;
1647 
1648  /* inferred after ProcArrayLock is released */
1650 
1651  LWLockAcquire(ProcArrayLock, LW_SHARED);
1652 
1654 
1655  /*
1656  * We initialize the MIN() calculation with latestCompletedXid + 1. This
1657  * is a lower bound for the XIDs that might appear in the ProcArray later,
1658  * and so protects us against overestimating the result due to future
1659  * additions.
1660  */
1661  {
1662  TransactionId initial;
1663 
1665  Assert(TransactionIdIsValid(initial));
1666  TransactionIdAdvance(initial);
1667 
1668  h->oldest_considered_running = initial;
1669  h->shared_oldest_nonremovable = initial;
1670  h->data_oldest_nonremovable = initial;
1671  }
1672 
1673  /*
1674  * Fetch slot horizons while ProcArrayLock is held - the
1675  * LWLockAcquire/LWLockRelease are a barrier, ensuring this happens inside
1676  * the lock.
1677  */
1678  h->slot_xmin = procArray->replication_slot_xmin;
1680 
1681  for (int index = 0; index < arrayP->numProcs; index++)
1682  {
1683  int pgprocno = arrayP->pgprocnos[index];
1684  PGPROC *proc = &allProcs[pgprocno];
1685  int8 vacuumFlags = ProcGlobal->vacuumFlags[index];
1686  TransactionId xid;
1687  TransactionId xmin;
1688 
1689  /* Fetch xid just once - see GetNewTransactionId */
1690  xid = UINT32_ACCESS_ONCE(other_xids[index]);
1691  xmin = UINT32_ACCESS_ONCE(proc->xmin);
1692 
1693  /*
1694  * Consider both the transaction's Xmin, and its Xid.
1695  *
1696  * We must check both because a transaction might have an Xmin but not
1697  * (yet) an Xid; conversely, if it has an Xid, that could determine
1698  * some not-yet-set Xmin.
1699  */
1700  xmin = TransactionIdOlder(xmin, xid);
1701 
1702  /* if neither is set, this proc doesn't influence the horizon */
1703  if (!TransactionIdIsValid(xmin))
1704  continue;
1705 
1706  /*
1707  * Don't ignore any procs when determining which transactions might be
1708  * considered running. While slots should ensure logical decoding
1709  * backends are protected even without this check, it can't hurt to
1710  * include them here as well..
1711  */
1714 
1715  /*
1716  * Skip over backends either vacuuming (which is ok with rows being
1717  * removed, as long as pg_subtrans is not truncated) or doing logical
1718  * decoding (which manages xmin separately, check below).
1719  */
1720  if (vacuumFlags & (PROC_IN_VACUUM | PROC_IN_LOGICAL_DECODING))
1721  continue;
1722 
1723  /* shared tables need to take backends in all database into account */
1726 
1727  /*
1728  * Normally queries in other databases are ignored for anything but
1729  * the shared horizon. But in recovery we cannot compute an accurate
1730  * per-database horizon as all xids are managed via the
1731  * KnownAssignedXids machinery.
1732  */
1733  if (in_recovery ||
1734  proc->databaseId == MyDatabaseId ||
1735  proc->databaseId == 0) /* always include WalSender */
1736  {
1739  }
1740  }
1741 
1742  /*
1743  * If in recovery fetch oldest xid in KnownAssignedXids, will be applied
1744  * after lock is released.
1745  */
1746  if (in_recovery)
1747  kaxmin = KnownAssignedXidsGetOldestXmin();
1748 
1749  /*
1750  * No other information from shared state is needed, release the lock
1751  * immediately. The rest of the computations can be done without a lock.
1752  */
1753  LWLockRelease(ProcArrayLock);
1754 
1755  if (in_recovery)
1756  {
1763  }
1764  else
1765  {
1766  /*
1767  * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age.
1768  *
1769  * vacuum_defer_cleanup_age provides some additional "slop" for the
1770  * benefit of hot standby queries on standby servers. This is quick
1771  * and dirty, and perhaps not all that useful unless the primary has a
1772  * predictable transaction rate, but it offers some protection when
1773  * there's no walsender connection. Note that we are assuming
1774  * vacuum_defer_cleanup_age isn't large enough to cause wraparound ---
1775  * so guc.c should limit it to no more than the xidStopLimit threshold
1776  * in varsup.c. Also note that we intentionally don't apply
1777  * vacuum_defer_cleanup_age on standby servers.
1778  */
1788  }
1789 
1790  /*
1791  * Check whether there are replication slots requiring an older xmin.
1792  */
1797 
1798  /*
1799  * The only difference between catalog / data horizons is that the slot's
1800  * catalog xmin is applied to the catalog one (so catalogs can be accessed
1801  * for logical decoding). Initialize with data horizon, and then back up
1802  * further if necessary. Have to back up the shared horizon as well, since
1803  * that also can contain catalogs.
1804  */
1808  h->slot_catalog_xmin);
1812  h->slot_catalog_xmin);
1813 
1814  /*
1815  * It's possible that slots / vacuum_defer_cleanup_age backed up the
1816  * horizons further than oldest_considered_running. Fix.
1817  */
1827 
1828  /*
1829  * shared horizons have to be at least as old as the oldest visible in
1830  * current db
1831  */
1836 
1837  /*
1838  * Horizons need to ensure that pg_subtrans access is still possible for
1839  * the relevant backends.
1840  */
1849  h->slot_xmin));
1852  h->slot_catalog_xmin));
1853 
1854  /* update approximate horizons with the computed horizons */
1856 }
1857 
1858 /*
1859  * Return the oldest XID for which deleted tuples must be preserved in the
1860  * passed table.
1861  *
1862  * If rel is not NULL the horizon may be considerably more recent than
1863  * otherwise (i.e. fewer tuples will be removable). In the NULL case a horizon
1864  * that is correct (but not optimal) for all relations will be returned.
1865  *
1866  * This is used by VACUUM to decide which deleted tuples must be preserved in
1867  * the passed in table.
1868  */
1871 {
1872  ComputeXidHorizonsResult horizons;
1873 
1874  ComputeXidHorizons(&horizons);
1875 
1876  /* select horizon appropriate for relation */
1877  if (rel == NULL || rel->rd_rel->relisshared)
1878  return horizons.shared_oldest_nonremovable;
1880  return horizons.catalog_oldest_nonremovable;
1881  else
1882  return horizons.data_oldest_nonremovable;
1883 }
1884 
1885 /*
1886  * Return the oldest transaction id any currently running backend might still
1887  * consider running. This should not be used for visibility / pruning
1888  * determinations (see GetOldestNonRemovableTransactionId()), but for
1889  * decisions like up to where pg_subtrans can be truncated.
1890  */
1893 {
1894  ComputeXidHorizonsResult horizons;
1895 
1896  ComputeXidHorizons(&horizons);
1897 
1898  return horizons.oldest_considered_running;
1899 }
1900 
1901 /*
1902  * Return the visibility horizons for a hot standby feedback message.
1903  */
1904 void
1906 {
1907  ComputeXidHorizonsResult horizons;
1908 
1909  ComputeXidHorizons(&horizons);
1910 
1911  /*
1912  * Don't want to use shared_oldest_nonremovable here, as that contains the
1913  * effect of replication slot's catalog_xmin. We want to send a separate
1914  * feedback for the catalog horizon, so the primary can remove data table
1915  * contents more aggressively.
1916  */
1917  *xmin = horizons.shared_oldest_nonremovable_raw;
1918  *catalog_xmin = horizons.slot_catalog_xmin;
1919 }
1920 
1921 /*
1922  * GetMaxSnapshotXidCount -- get max size for snapshot XID array
1923  *
1924  * We have to export this for use by snapmgr.c.
1925  */
1926 int
1928 {
1929  return procArray->maxProcs;
1930 }
1931 
1932 /*
1933  * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
1934  *
1935  * We have to export this for use by snapmgr.c.
1936  */
1937 int
1939 {
1940  return TOTAL_MAX_CACHED_SUBXIDS;
1941 }
1942 
1943 /*
1944  * Initialize old_snapshot_threshold specific parts of a newly build snapshot.
1945  */
1946 static void
1948 {
1950  {
1951  /*
1952  * If not using "snapshot too old" feature, fill related fields with
1953  * dummy values that don't require any locking.
1954  */
1955  snapshot->lsn = InvalidXLogRecPtr;
1956  snapshot->whenTaken = 0;
1957  }
1958  else
1959  {
1960  /*
1961  * Capture the current time and WAL stream location in case this
1962  * snapshot becomes old enough to need to fall back on the special
1963  * "old snapshot" logic.
1964  */
1965  snapshot->lsn = GetXLogInsertRecPtr();
1966  snapshot->whenTaken = GetSnapshotCurrentTimestamp();
1967  MaintainOldSnapshotTimeMapping(snapshot->whenTaken, snapshot->xmin);
1968  }
1969 }
1970 
1971 /*
1972  * Helper function for GetSnapshotData() that checks if the bulk of the
1973  * visibility information in the snapshot is still valid. If so, it updates
1974  * the fields that need to change and returns true. Otherwise it returns
1975  * false.
1976  *
1977  * This very likely can be evolved to not need ProcArrayLock held (at very
1978  * least in the case we already hold a snapshot), but that's for another day.
1979  */
1980 static bool
1982 {
1983  uint64 curXactCompletionCount;
1984 
1985  Assert(LWLockHeldByMe(ProcArrayLock));
1986 
1987  if (unlikely(snapshot->snapXactCompletionCount == 0))
1988  return false;
1989 
1990  curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
1991  if (curXactCompletionCount != snapshot->snapXactCompletionCount)
1992  return false;
1993 
1994  /*
1995  * If the current xactCompletionCount is still the same as it was at the
1996  * time the snapshot was built, we can be sure that rebuilding the
1997  * contents of the snapshot the hard way would result in the same snapshot
1998  * contents:
1999  *
2000  * As explained in transam/README, the set of xids considered running by
2001  * GetSnapshotData() cannot change while ProcArrayLock is held. Snapshot
2002  * contents only depend on transactions with xids and xactCompletionCount
2003  * is incremented whenever a transaction with an xid finishes (while
2004  * holding ProcArrayLock) exclusively). Thus the xactCompletionCount check
2005  * ensures we would detect if the snapshot would have changed.
2006  *
2007  * As the snapshot contents are the same as it was before, it is is safe
2008  * to re-enter the snapshot's xmin into the PGPROC array. None of the rows
2009  * visible under the snapshot could already have been removed (that'd
2010  * require the set of running transactions to change) and it fulfills the
2011  * requirement that concurrent GetSnapshotData() calls yield the same
2012  * xmin.
2013  */
2015  MyProc->xmin = TransactionXmin = snapshot->xmin;
2016 
2017  RecentXmin = snapshot->xmin;
2019 
2020  snapshot->curcid = GetCurrentCommandId(false);
2021  snapshot->active_count = 0;
2022  snapshot->regd_count = 0;
2023  snapshot->copied = false;
2024 
2026 
2027  return true;
2028 }
2029 
2030 /*
2031  * GetSnapshotData -- returns information about running transactions.
2032  *
2033  * The returned snapshot includes xmin (lowest still-running xact ID),
2034  * xmax (highest completed xact ID + 1), and a list of running xact IDs
2035  * in the range xmin <= xid < xmax. It is used as follows:
2036  * All xact IDs < xmin are considered finished.
2037  * All xact IDs >= xmax are considered still running.
2038  * For an xact ID xmin <= xid < xmax, consult list to see whether
2039  * it is considered running or not.
2040  * This ensures that the set of transactions seen as "running" by the
2041  * current xact will not change after it takes the snapshot.
2042  *
2043  * All running top-level XIDs are included in the snapshot, except for lazy
2044  * VACUUM processes. We also try to include running subtransaction XIDs,
2045  * but since PGPROC has only a limited cache area for subxact XIDs, full
2046  * information may not be available. If we find any overflowed subxid arrays,
2047  * we have to mark the snapshot's subxid data as overflowed, and extra work
2048  * *may* need to be done to determine what's running (see XidInMVCCSnapshot()
2049  * in heapam_visibility.c).
2050  *
2051  * We also update the following backend-global variables:
2052  * TransactionXmin: the oldest xmin of any snapshot in use in the
2053  * current transaction (this is the same as MyProc->xmin).
2054  * RecentXmin: the xmin computed for the most recent snapshot. XIDs
2055  * older than this are known not running any more.
2056  *
2057  * And try to advance the bounds of GlobalVisSharedRels, GlobalVisCatalogRels,
2058  * GlobalVisDataRels for the benefit of theGlobalVisTest* family of functions.
2059  *
2060  * Note: this function should probably not be called with an argument that's
2061  * not statically allocated (see xip allocation below).
2062  */
2063 Snapshot
2065 {
2066  ProcArrayStruct *arrayP = procArray;
2067  TransactionId *other_xids = ProcGlobal->xids;
2068  TransactionId xmin;
2069  TransactionId xmax;
2070  size_t count = 0;
2071  int subcount = 0;
2072  bool suboverflowed = false;
2073  FullTransactionId latest_completed;
2074  TransactionId oldestxid;
2075  int mypgxactoff;
2076  TransactionId myxid;
2077  uint64 curXactCompletionCount;
2078 
2081 
2082  Assert(snapshot != NULL);
2083 
2084  /*
2085  * Allocating space for maxProcs xids is usually overkill; numProcs would
2086  * be sufficient. But it seems better to do the malloc while not holding
2087  * the lock, so we can't look at numProcs. Likewise, we allocate much
2088  * more subxip storage than is probably needed.
2089  *
2090  * This does open a possibility for avoiding repeated malloc/free: since
2091  * maxProcs does not change at runtime, we can simply reuse the previous
2092  * xip arrays if any. (This relies on the fact that all callers pass
2093  * static SnapshotData structs.)
2094  */
2095  if (snapshot->xip == NULL)
2096  {
2097  /*
2098  * First call for this snapshot. Snapshot is same size whether or not
2099  * we are in recovery, see later comments.
2100  */
2101  snapshot->xip = (TransactionId *)
2103  if (snapshot->xip == NULL)
2104  ereport(ERROR,
2105  (errcode(ERRCODE_OUT_OF_MEMORY),
2106  errmsg("out of memory")));
2107  Assert(snapshot->subxip == NULL);
2108  snapshot->subxip = (TransactionId *)
2110  if (snapshot->subxip == NULL)
2111  ereport(ERROR,
2112  (errcode(ERRCODE_OUT_OF_MEMORY),
2113  errmsg("out of memory")));
2114  }
2115 
2116  /*
2117  * It is sufficient to get shared lock on ProcArrayLock, even if we are
2118  * going to set MyProc->xmin.
2119  */
2120  LWLockAcquire(ProcArrayLock, LW_SHARED);
2121 
2122  if (GetSnapshotDataReuse(snapshot))
2123  {
2124  LWLockRelease(ProcArrayLock);
2125  return snapshot;
2126  }
2127 
2128  latest_completed = ShmemVariableCache->latestCompletedXid;
2129  mypgxactoff = MyProc->pgxactoff;
2130  myxid = other_xids[mypgxactoff];
2131  Assert(myxid == MyProc->xid);
2132 
2133  oldestxid = ShmemVariableCache->oldestXid;
2134  curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
2135 
2136  /* xmax is always latestCompletedXid + 1 */
2137  xmax = XidFromFullTransactionId(latest_completed);
2138  TransactionIdAdvance(xmax);
2140 
2141  /* initialize xmin calculation with xmax */
2142  xmin = xmax;
2143 
2144  /* take own xid into account, saves a check inside the loop */
2145  if (TransactionIdIsNormal(myxid) && NormalTransactionIdPrecedes(myxid, xmin))
2146  xmin = myxid;
2147 
2149 
2150  if (!snapshot->takenDuringRecovery)
2151  {
2152  size_t numProcs = arrayP->numProcs;
2153  TransactionId *xip = snapshot->xip;
2154  int *pgprocnos = arrayP->pgprocnos;
2155  XidCacheStatus *subxidStates = ProcGlobal->subxidStates;
2156  uint8 *allVacuumFlags = ProcGlobal->vacuumFlags;
2157 
2158  /*
2159  * First collect set of pgxactoff/xids that need to be included in the
2160  * snapshot.
2161  */
2162  for (size_t pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
2163  {
2164  /* Fetch xid just once - see GetNewTransactionId */
2165  TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
2166  uint8 vacuumFlags;
2167 
2168  Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff);
2169 
2170  /*
2171  * If the transaction has no XID assigned, we can skip it; it
2172  * won't have sub-XIDs either.
2173  */
2174  if (likely(xid == InvalidTransactionId))
2175  continue;
2176 
2177  /*
2178  * We don't include our own XIDs (if any) in the snapshot. It
2179  * needs to be includeded in the xmin computation, but we did so
2180  * outside the loop.
2181  */
2182  if (pgxactoff == mypgxactoff)
2183  continue;
2184 
2185  /*
2186  * The only way we are able to get here with a non-normal xid
2187  * is during bootstrap - with this backend using
2188  * BootstrapTransactionId. But the above test should filter
2189  * that out.
2190  */
2192 
2193  /*
2194  * If the XID is >= xmax, we can skip it; such transactions will
2195  * be treated as running anyway (and any sub-XIDs will also be >=
2196  * xmax).
2197  */
2198  if (!NormalTransactionIdPrecedes(xid, xmax))
2199  continue;
2200 
2201  /*
2202  * Skip over backends doing logical decoding which manages xmin
2203  * separately (check below) and ones running LAZY VACUUM.
2204  */
2205  vacuumFlags = allVacuumFlags[pgxactoff];
2206  if (vacuumFlags & (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM))
2207  continue;
2208 
2209  if (NormalTransactionIdPrecedes(xid, xmin))
2210  xmin = xid;
2211 
2212  /* Add XID to snapshot. */
2213  xip[count++] = xid;
2214 
2215  /*
2216  * Save subtransaction XIDs if possible (if we've already
2217  * overflowed, there's no point). Note that the subxact XIDs must
2218  * be later than their parent, so no need to check them against
2219  * xmin. We could filter against xmax, but it seems better not to
2220  * do that much work while holding the ProcArrayLock.
2221  *
2222  * The other backend can add more subxids concurrently, but cannot
2223  * remove any. Hence it's important to fetch nxids just once.
2224  * Should be safe to use memcpy, though. (We needn't worry about
2225  * missing any xids added concurrently, because they must postdate
2226  * xmax.)
2227  *
2228  * Again, our own XIDs are not included in the snapshot.
2229  */
2230  if (!suboverflowed)
2231  {
2232 
2233  if (subxidStates[pgxactoff].overflowed)
2234  suboverflowed = true;
2235  else
2236  {
2237  int nsubxids = subxidStates[pgxactoff].count;
2238 
2239  if (nsubxids > 0)
2240  {
2241  int pgprocno = pgprocnos[pgxactoff];
2242  PGPROC *proc = &allProcs[pgprocno];
2243 
2244  pg_read_barrier(); /* pairs with GetNewTransactionId */
2245 
2246  memcpy(snapshot->subxip + subcount,
2247  (void *) proc->subxids.xids,
2248  nsubxids * sizeof(TransactionId));
2249  subcount += nsubxids;
2250  }
2251  }
2252  }
2253  }
2254  }
2255  else
2256  {
2257  /*
2258  * We're in hot standby, so get XIDs from KnownAssignedXids.
2259  *
2260  * We store all xids directly into subxip[]. Here's why:
2261  *
2262  * In recovery we don't know which xids are top-level and which are
2263  * subxacts, a design choice that greatly simplifies xid processing.
2264  *
2265  * It seems like we would want to try to put xids into xip[] only, but
2266  * that is fairly small. We would either need to make that bigger or
2267  * to increase the rate at which we WAL-log xid assignment; neither is
2268  * an appealing choice.
2269  *
2270  * We could try to store xids into xip[] first and then into subxip[]
2271  * if there are too many xids. That only works if the snapshot doesn't
2272  * overflow because we do not search subxip[] in that case. A simpler
2273  * way is to just store all xids in the subxact array because this is
2274  * by far the bigger array. We just leave the xip array empty.
2275  *
2276  * Either way we need to change the way XidInMVCCSnapshot() works
2277  * depending upon when the snapshot was taken, or change normal
2278  * snapshot processing so it matches.
2279  *
2280  * Note: It is possible for recovery to end before we finish taking
2281  * the snapshot, and for newly assigned transaction ids to be added to
2282  * the ProcArray. xmax cannot change while we hold ProcArrayLock, so
2283  * those newly added transaction ids would be filtered away, so we
2284  * need not be concerned about them.
2285  */
2286  subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin,
2287  xmax);
2288 
2289  if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid))
2290  suboverflowed = true;
2291  }
2292 
2293 
2294  /*
2295  * Fetch into local variable while ProcArrayLock is held - the
2296  * LWLockRelease below is a barrier, ensuring this happens inside the
2297  * lock.
2298  */
2299  replication_slot_xmin = procArray->replication_slot_xmin;
2300  replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
2301 
2303  MyProc->xmin = TransactionXmin = xmin;
2304 
2305  LWLockRelease(ProcArrayLock);
2306 
2307  /* maintain state for GlobalVis* */
2308  {
2309  TransactionId def_vis_xid;
2310  TransactionId def_vis_xid_data;
2311  FullTransactionId def_vis_fxid;
2312  FullTransactionId def_vis_fxid_data;
2313  FullTransactionId oldestfxid;
2314 
2315  /*
2316  * Converting oldestXid is only safe when xid horizon cannot advance,
2317  * i.e. holding locks. While we don't hold the lock anymore, all the
2318  * necessary data has been gathered with lock held.
2319  */
2320  oldestfxid = FullXidRelativeTo(latest_completed, oldestxid);
2321 
2322  /* apply vacuum_defer_cleanup_age */
2323  def_vis_xid_data =
2325 
2326  /* Check whether there's a replication slot requiring an older xmin. */
2327  def_vis_xid_data =
2328  TransactionIdOlder(def_vis_xid_data, replication_slot_xmin);
2329 
2330  /*
2331  * Rows in non-shared, non-catalog tables possibly could be vacuumed
2332  * if older than this xid.
2333  */
2334  def_vis_xid = def_vis_xid_data;
2335 
2336  /*
2337  * Check whether there's a replication slot requiring an older catalog
2338  * xmin.
2339  */
2340  def_vis_xid =
2341  TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid);
2342 
2343  def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid);
2344  def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data);
2345 
2346  /*
2347  * Check if we can increase upper bound. As a previous
2348  * GlobalVisUpdate() might have computed more aggressive values, don't
2349  * overwrite them if so.
2350  */
2351  GlobalVisSharedRels.definitely_needed =
2352  FullTransactionIdNewer(def_vis_fxid,
2353  GlobalVisSharedRels.definitely_needed);
2354  GlobalVisCatalogRels.definitely_needed =
2355  FullTransactionIdNewer(def_vis_fxid,
2356  GlobalVisCatalogRels.definitely_needed);
2357  GlobalVisDataRels.definitely_needed =
2358  FullTransactionIdNewer(def_vis_fxid_data,
2359  GlobalVisDataRels.definitely_needed);
2360 
2361  /*
2362  * Check if we know that we can initialize or increase the lower
2363  * bound. Currently the only cheap way to do so is to use
2364  * ShmemVariableCache->oldestXid as input.
2365  *
2366  * We should definitely be able to do better. We could e.g. put a
2367  * global lower bound value into ShmemVariableCache.
2368  */
2369  GlobalVisSharedRels.maybe_needed =
2370  FullTransactionIdNewer(GlobalVisSharedRels.maybe_needed,
2371  oldestfxid);
2372  GlobalVisCatalogRels.maybe_needed =
2373  FullTransactionIdNewer(GlobalVisCatalogRels.maybe_needed,
2374  oldestfxid);
2375  GlobalVisDataRels.maybe_needed =
2376  FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
2377  oldestfxid);
2378  }
2379 
2380  RecentXmin = xmin;
2382 
2383  snapshot->xmin = xmin;
2384  snapshot->xmax = xmax;
2385  snapshot->xcnt = count;
2386  snapshot->subxcnt = subcount;
2387  snapshot->suboverflowed = suboverflowed;
2388  snapshot->snapXactCompletionCount = curXactCompletionCount;
2389 
2390  snapshot->curcid = GetCurrentCommandId(false);
2391 
2392  /*
2393  * This is a new snapshot, so set both refcounts are zero, and mark it as
2394  * not copied in persistent memory.
2395  */
2396  snapshot->active_count = 0;
2397  snapshot->regd_count = 0;
2398  snapshot->copied = false;
2399 
2401 
2402  return snapshot;
2403 }
2404 
2405 /*
2406  * ProcArrayInstallImportedXmin -- install imported xmin into MyProc->xmin
2407  *
2408  * This is called when installing a snapshot imported from another
2409  * transaction. To ensure that OldestXmin doesn't go backwards, we must
2410  * check that the source transaction is still running, and we'd better do
2411  * that atomically with installing the new xmin.
2412  *
2413  * Returns true if successful, false if source xact is no longer running.
2414  */
2415 bool
2417  VirtualTransactionId *sourcevxid)
2418 {
2419  bool result = false;
2420  ProcArrayStruct *arrayP = procArray;
2421  int index;
2422 
2424  if (!sourcevxid)
2425  return false;
2426 
2427  /* Get lock so source xact can't end while we're doing this */
2428  LWLockAcquire(ProcArrayLock, LW_SHARED);
2429 
2430  for (index = 0; index < arrayP->numProcs; index++)
2431  {
2432  int pgprocno = arrayP->pgprocnos[index];
2433  PGPROC *proc = &allProcs[pgprocno];
2434  int vacuumFlags = ProcGlobal->vacuumFlags[index];
2435  TransactionId xid;
2436 
2437  /* Ignore procs running LAZY VACUUM */
2438  if (vacuumFlags & PROC_IN_VACUUM)
2439  continue;
2440 
2441  /* We are only interested in the specific virtual transaction. */
2442  if (proc->backendId != sourcevxid->backendId)
2443  continue;
2444  if (proc->lxid != sourcevxid->localTransactionId)
2445  continue;
2446 
2447  /*
2448  * We check the transaction's database ID for paranoia's sake: if it's
2449  * in another DB then its xmin does not cover us. Caller should have
2450  * detected this already, so we just treat any funny cases as
2451  * "transaction not found".
2452  */
2453  if (proc->databaseId != MyDatabaseId)
2454  continue;
2455 
2456  /*
2457  * Likewise, let's just make real sure its xmin does cover us.
2458  */
2459  xid = UINT32_ACCESS_ONCE(proc->xmin);
2460  if (!TransactionIdIsNormal(xid) ||
2461  !TransactionIdPrecedesOrEquals(xid, xmin))
2462  continue;
2463 
2464  /*
2465  * We're good. Install the new xmin. As in GetSnapshotData, set
2466  * TransactionXmin too. (Note that because snapmgr.c called
2467  * GetSnapshotData first, we'll be overwriting a valid xmin here, so
2468  * we don't check that.)
2469  */
2470  MyProc->xmin = TransactionXmin = xmin;
2471 
2472  result = true;
2473  break;
2474  }
2475 
2476  LWLockRelease(ProcArrayLock);
2477 
2478  return result;
2479 }
2480 
2481 /*
2482  * ProcArrayInstallRestoredXmin -- install restored xmin into MyProc->xmin
2483  *
2484  * This is like ProcArrayInstallImportedXmin, but we have a pointer to the
2485  * PGPROC of the transaction from which we imported the snapshot, rather than
2486  * an XID.
2487  *
2488  * Returns true if successful, false if source xact is no longer running.
2489  */
2490 bool
2492 {
2493  bool result = false;
2494  TransactionId xid;
2495 
2497  Assert(proc != NULL);
2498 
2499  /* Get lock so source xact can't end while we're doing this */
2500  LWLockAcquire(ProcArrayLock, LW_SHARED);
2501 
2502  /*
2503  * Be certain that the referenced PGPROC has an advertised xmin which is
2504  * no later than the one we're installing, so that the system-wide xmin
2505  * can't go backwards. Also, make sure it's running in the same database,
2506  * so that the per-database xmin cannot go backwards.
2507  */
2508  xid = UINT32_ACCESS_ONCE(proc->xmin);
2509  if (proc->databaseId == MyDatabaseId &&
2510  TransactionIdIsNormal(xid) &&
2511  TransactionIdPrecedesOrEquals(xid, xmin))
2512  {
2513  MyProc->xmin = TransactionXmin = xmin;
2514  result = true;
2515  }
2516 
2517  LWLockRelease(ProcArrayLock);
2518 
2519  return result;
2520 }
2521 
2522 /*
2523  * GetRunningTransactionData -- returns information about running transactions.
2524  *
2525  * Similar to GetSnapshotData but returns more information. We include
2526  * all PGPROCs with an assigned TransactionId, even VACUUM processes and
2527  * prepared transactions.
2528  *
2529  * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
2530  * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
2531  * array until the caller has WAL-logged this snapshot, and releases the
2532  * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
2533  * lock is released.
2534  *
2535  * The returned data structure is statically allocated; caller should not
2536  * modify it, and must not assume it is valid past the next call.
2537  *
2538  * This is never executed during recovery so there is no need to look at
2539  * KnownAssignedXids.
2540  *
2541  * Dummy PGPROCs from prepared transaction are included, meaning that this
2542  * may return entries with duplicated TransactionId values coming from
2543  * transaction finishing to prepare. Nothing is done about duplicated
2544  * entries here to not hold on ProcArrayLock more than necessary.
2545  *
2546  * We don't worry about updating other counters, we want to keep this as
2547  * simple as possible and leave GetSnapshotData() as the primary code for
2548  * that bookkeeping.
2549  *
2550  * Note that if any transaction has overflowed its cached subtransactions
2551  * then there is no real need include any subtransactions.
2552  */
2555 {
2556  /* result workspace */
2557  static RunningTransactionsData CurrentRunningXactsData;
2558 
2559  ProcArrayStruct *arrayP = procArray;
2560  TransactionId *other_xids = ProcGlobal->xids;
2561  RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
2562  TransactionId latestCompletedXid;
2563  TransactionId oldestRunningXid;
2564  TransactionId *xids;
2565  int index;
2566  int count;
2567  int subcount;
2568  bool suboverflowed;
2569 
2571 
2572  /*
2573  * Allocating space for maxProcs xids is usually overkill; numProcs would
2574  * be sufficient. But it seems better to do the malloc while not holding
2575  * the lock, so we can't look at numProcs. Likewise, we allocate much
2576  * more subxip storage than is probably needed.
2577  *
2578  * Should only be allocated in bgwriter, since only ever executed during
2579  * checkpoints.
2580  */
2581  if (CurrentRunningXacts->xids == NULL)
2582  {
2583  /*
2584  * First call
2585  */
2586  CurrentRunningXacts->xids = (TransactionId *)
2588  if (CurrentRunningXacts->xids == NULL)
2589  ereport(ERROR,
2590  (errcode(ERRCODE_OUT_OF_MEMORY),
2591  errmsg("out of memory")));
2592  }
2593 
2594  xids = CurrentRunningXacts->xids;
2595 
2596  count = subcount = 0;
2597  suboverflowed = false;
2598 
2599  /*
2600  * Ensure that no xids enter or leave the procarray while we obtain
2601  * snapshot.
2602  */
2603  LWLockAcquire(ProcArrayLock, LW_SHARED);
2604  LWLockAcquire(XidGenLock, LW_SHARED);
2605 
2606  latestCompletedXid =
2608  oldestRunningXid =
2610 
2611  /*
2612  * Spin over procArray collecting all xids
2613  */
2614  for (index = 0; index < arrayP->numProcs; index++)
2615  {
2616  TransactionId xid;
2617 
2618  /* Fetch xid just once - see GetNewTransactionId */
2619  xid = UINT32_ACCESS_ONCE(other_xids[index]);
2620 
2621  /*
2622  * We don't need to store transactions that don't have a TransactionId
2623  * yet because they will not show as running on a standby server.
2624  */
2625  if (!TransactionIdIsValid(xid))
2626  continue;
2627 
2628  /*
2629  * Be careful not to exclude any xids before calculating the values of
2630  * oldestRunningXid and suboverflowed, since these are used to clean
2631  * up transaction information held on standbys.
2632  */
2633  if (TransactionIdPrecedes(xid, oldestRunningXid))
2634  oldestRunningXid = xid;
2635 
2636  if (ProcGlobal->subxidStates[index].overflowed)
2637  suboverflowed = true;
2638 
2639  /*
2640  * If we wished to exclude xids this would be the right place for it.
2641  * Procs with the PROC_IN_VACUUM flag set don't usually assign xids,
2642  * but they do during truncation at the end when they get the lock and
2643  * truncate, so it is not much of a problem to include them if they
2644  * are seen and it is cleaner to include them.
2645  */
2646 
2647  xids[count++] = xid;
2648  }
2649 
2650  /*
2651  * Spin over procArray collecting all subxids, but only if there hasn't
2652  * been a suboverflow.
2653  */
2654  if (!suboverflowed)
2655  {
2656  XidCacheStatus *other_subxidstates = ProcGlobal->subxidStates;
2657 
2658  for (index = 0; index < arrayP->numProcs; index++)
2659  {
2660  int pgprocno = arrayP->pgprocnos[index];
2661  PGPROC *proc = &allProcs[pgprocno];
2662  int nsubxids;
2663 
2664  /*
2665  * Save subtransaction XIDs. Other backends can't add or remove
2666  * entries while we're holding XidGenLock.
2667  */
2668  nsubxids = other_subxidstates[index].count;
2669  if (nsubxids > 0)
2670  {
2671  /* barrier not really required, as XidGenLock is held, but ... */
2672  pg_read_barrier(); /* pairs with GetNewTransactionId */
2673 
2674  memcpy(&xids[count], (void *) proc->subxids.xids,
2675  nsubxids * sizeof(TransactionId));
2676  count += nsubxids;
2677  subcount += nsubxids;
2678 
2679  /*
2680  * Top-level XID of a transaction is always less than any of
2681  * its subxids, so we don't need to check if any of the
2682  * subxids are smaller than oldestRunningXid
2683  */
2684  }
2685  }
2686  }
2687 
2688  /*
2689  * It's important *not* to include the limits set by slots here because
2690  * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
2691  * were to be included here the initial value could never increase because
2692  * of a circular dependency where slots only increase their limits when
2693  * running xacts increases oldestRunningXid and running xacts only
2694  * increases if slots do.
2695  */
2696 
2697  CurrentRunningXacts->xcnt = count - subcount;
2698  CurrentRunningXacts->subxcnt = subcount;
2699  CurrentRunningXacts->subxid_overflow = suboverflowed;
2701  CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
2702  CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
2703 
2704  Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
2705  Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
2706  Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
2707 
2708  /* We don't release the locks here, the caller is responsible for that */
2709 
2710  return CurrentRunningXacts;
2711 }
2712 
2713 /*
2714  * GetOldestActiveTransactionId()
2715  *
2716  * Similar to GetSnapshotData but returns just oldestActiveXid. We include
2717  * all PGPROCs with an assigned TransactionId, even VACUUM processes.
2718  * We look at all databases, though there is no need to include WALSender
2719  * since this has no effect on hot standby conflicts.
2720  *
2721  * This is never executed during recovery so there is no need to look at
2722  * KnownAssignedXids.
2723  *
2724  * We don't worry about updating other counters, we want to keep this as
2725  * simple as possible and leave GetSnapshotData() as the primary code for
2726  * that bookkeeping.
2727  */
2730 {
2731  ProcArrayStruct *arrayP = procArray;
2732  TransactionId *other_xids = ProcGlobal->xids;
2733  TransactionId oldestRunningXid;
2734  int index;
2735 
2737 
2738  /*
2739  * Read nextXid, as the upper bound of what's still active.
2740  *
2741  * Reading a TransactionId is atomic, but we must grab the lock to make
2742  * sure that all XIDs < nextXid are already present in the proc array (or
2743  * have already completed), when we spin over it.
2744  */
2745  LWLockAcquire(XidGenLock, LW_SHARED);
2747  LWLockRelease(XidGenLock);
2748 
2749  /*
2750  * Spin over procArray collecting all xids and subxids.
2751  */
2752  LWLockAcquire(ProcArrayLock, LW_SHARED);
2753  for (index = 0; index < arrayP->numProcs; index++)
2754  {
2755  TransactionId xid;
2756 
2757  /* Fetch xid just once - see GetNewTransactionId */
2758  xid = UINT32_ACCESS_ONCE(other_xids[index]);
2759 
2760  if (!TransactionIdIsNormal(xid))
2761  continue;
2762 
2763  if (TransactionIdPrecedes(xid, oldestRunningXid))
2764  oldestRunningXid = xid;
2765 
2766  /*
2767  * Top-level XID of a transaction is always less than any of its
2768  * subxids, so we don't need to check if any of the subxids are
2769  * smaller than oldestRunningXid
2770  */
2771  }
2772  LWLockRelease(ProcArrayLock);
2773 
2774  return oldestRunningXid;
2775 }
2776 
2777 /*
2778  * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
2779  *
2780  * Returns the oldest xid that we can guarantee not to have been affected by
2781  * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
2782  * transaction aborted. Note that the value can (and most of the time will) be
2783  * much more conservative than what really has been affected by vacuum, but we
2784  * currently don't have better data available.
2785  *
2786  * This is useful to initialize the cutoff xid after which a new changeset
2787  * extraction replication slot can start decoding changes.
2788  *
2789  * Must be called with ProcArrayLock held either shared or exclusively,
2790  * although most callers will want to use exclusive mode since it is expected
2791  * that the caller will immediately use the xid to peg the xmin horizon.
2792  */
2795 {
2796  ProcArrayStruct *arrayP = procArray;
2797  TransactionId oldestSafeXid;
2798  int index;
2799  bool recovery_in_progress = RecoveryInProgress();
2800 
2801  Assert(LWLockHeldByMe(ProcArrayLock));
2802 
2803  /*
2804  * Acquire XidGenLock, so no transactions can acquire an xid while we're
2805  * running. If no transaction with xid were running concurrently a new xid
2806  * could influence the RecentXmin et al.
2807  *
2808  * We initialize the computation to nextXid since that's guaranteed to be
2809  * a safe, albeit pessimal, value.
2810  */
2811  LWLockAcquire(XidGenLock, LW_SHARED);
2813 
2814  /*
2815  * If there's already a slot pegging the xmin horizon, we can start with
2816  * that value, it's guaranteed to be safe since it's computed by this
2817  * routine initially and has been enforced since. We can always use the
2818  * slot's general xmin horizon, but the catalog horizon is only usable
2819  * when only catalog data is going to be looked at.
2820  */
2821  if (TransactionIdIsValid(procArray->replication_slot_xmin) &&
2823  oldestSafeXid))
2824  oldestSafeXid = procArray->replication_slot_xmin;
2825 
2826  if (catalogOnly &&
2829  oldestSafeXid))
2830  oldestSafeXid = procArray->replication_slot_catalog_xmin;
2831 
2832  /*
2833  * If we're not in recovery, we walk over the procarray and collect the
2834  * lowest xid. Since we're called with ProcArrayLock held and have
2835  * acquired XidGenLock, no entries can vanish concurrently, since
2836  * ProcGlobal->xids[i] is only set with XidGenLock held and only cleared
2837  * with ProcArrayLock held.
2838  *
2839  * In recovery we can't lower the safe value besides what we've computed
2840  * above, so we'll have to wait a bit longer there. We unfortunately can
2841  * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
2842  * machinery can miss values and return an older value than is safe.
2843  */
2844  if (!recovery_in_progress)
2845  {
2846  TransactionId *other_xids = ProcGlobal->xids;
2847 
2848  /*
2849  * Spin over procArray collecting min(ProcGlobal->xids[i])
2850  */
2851  for (index = 0; index < arrayP->numProcs; index++)
2852  {
2853  TransactionId xid;
2854 
2855  /* Fetch xid just once - see GetNewTransactionId */
2856  xid = UINT32_ACCESS_ONCE(other_xids[index]);
2857 
2858  if (!TransactionIdIsNormal(xid))
2859  continue;
2860 
2861  if (TransactionIdPrecedes(xid, oldestSafeXid))
2862  oldestSafeXid = xid;
2863  }
2864  }
2865 
2866  LWLockRelease(XidGenLock);
2867 
2868  return oldestSafeXid;
2869 }
2870 
2871 /*
2872  * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
2873  * delaying checkpoint because they have critical actions in progress.
2874  *
2875  * Constructs an array of VXIDs of transactions that are currently in commit
2876  * critical sections, as shown by having delayChkpt set in their PGPROC.
2877  *
2878  * Returns a palloc'd array that should be freed by the caller.
2879  * *nvxids is the number of valid entries.
2880  *
2881  * Note that because backends set or clear delayChkpt without holding any lock,
2882  * the result is somewhat indeterminate, but we don't really care. Even in
2883  * a multiprocessor with delayed writes to shared memory, it should be certain
2884  * that setting of delayChkpt will propagate to shared memory when the backend
2885  * takes a lock, so we cannot fail to see a virtual xact as delayChkpt if
2886  * it's already inserted its commit record. Whether it takes a little while
2887  * for clearing of delayChkpt to propagate is unimportant for correctness.
2888  */
2891 {
2892  VirtualTransactionId *vxids;
2893  ProcArrayStruct *arrayP = procArray;
2894  int count = 0;
2895  int index;
2896 
2897  /* allocate what's certainly enough result space */
2898  vxids = (VirtualTransactionId *)
2899  palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
2900 
2901  LWLockAcquire(ProcArrayLock, LW_SHARED);
2902 
2903  for (index = 0; index < arrayP->numProcs; index++)
2904  {
2905  int pgprocno = arrayP->pgprocnos[index];
2906  PGPROC *proc = &allProcs[pgprocno];
2907 
2908  if (proc->delayChkpt)
2909  {
2910  VirtualTransactionId vxid;
2911 
2912  GET_VXID_FROM_PGPROC(vxid, *proc);
2913  if (VirtualTransactionIdIsValid(vxid))
2914  vxids[count++] = vxid;
2915  }
2916  }
2917 
2918  LWLockRelease(ProcArrayLock);
2919 
2920  *nvxids = count;
2921  return vxids;
2922 }
2923 
2924 /*
2925  * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
2926  *
2927  * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
2928  * of the specified VXIDs are still in critical sections of code.
2929  *
2930  * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
2931  * those numbers should be small enough for it not to be a problem.
2932  */
2933 bool
2935 {
2936  bool result = false;
2937  ProcArrayStruct *arrayP = procArray;
2938  int index;
2939 
2940  LWLockAcquire(ProcArrayLock, LW_SHARED);
2941 
2942  for (index = 0; index < arrayP->numProcs; index++)
2943  {
2944  int pgprocno = arrayP->pgprocnos[index];
2945  PGPROC *proc = &allProcs[pgprocno];
2946  VirtualTransactionId vxid;
2947 
2948  GET_VXID_FROM_PGPROC(vxid, *proc);
2949 
2950  if (proc->delayChkpt && VirtualTransactionIdIsValid(vxid))
2951  {
2952  int i;
2953 
2954  for (i = 0; i < nvxids; i++)
2955  {
2956  if (VirtualTransactionIdEquals(vxid, vxids[i]))
2957  {
2958  result = true;
2959  break;
2960  }
2961  }
2962  if (result)
2963  break;
2964  }
2965  }
2966 
2967  LWLockRelease(ProcArrayLock);
2968 
2969  return result;
2970 }
2971 
2972 /*
2973  * BackendPidGetProc -- get a backend's PGPROC given its PID
2974  *
2975  * Returns NULL if not found. Note that it is up to the caller to be
2976  * sure that the question remains meaningful for long enough for the
2977  * answer to be used ...
2978  */
2979 PGPROC *
2981 {
2982  PGPROC *result;
2983 
2984  if (pid == 0) /* never match dummy PGPROCs */
2985  return NULL;
2986 
2987  LWLockAcquire(ProcArrayLock, LW_SHARED);
2988 
2989  result = BackendPidGetProcWithLock(pid);
2990 
2991  LWLockRelease(ProcArrayLock);
2992 
2993  return result;
2994 }
2995 
2996 /*
2997  * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID
2998  *
2999  * Same as above, except caller must be holding ProcArrayLock. The found
3000  * entry, if any, can be assumed to be valid as long as the lock remains held.
3001  */
3002 PGPROC *
3004 {
3005  PGPROC *result = NULL;
3006  ProcArrayStruct *arrayP = procArray;
3007  int index;
3008 
3009  if (pid == 0) /* never match dummy PGPROCs */
3010  return NULL;
3011 
3012  for (index = 0; index < arrayP->numProcs; index++)
3013  {
3014  PGPROC *proc = &allProcs[arrayP->pgprocnos[index]];
3015 
3016  if (proc->pid == pid)
3017  {
3018  result = proc;
3019  break;
3020  }
3021  }
3022 
3023  return result;
3024 }
3025 
3026 /*
3027  * BackendXidGetPid -- get a backend's pid given its XID
3028  *
3029  * Returns 0 if not found or it's a prepared transaction. Note that
3030  * it is up to the caller to be sure that the question remains
3031  * meaningful for long enough for the answer to be used ...
3032  *
3033  * Only main transaction Ids are considered. This function is mainly
3034  * useful for determining what backend owns a lock.
3035  *
3036  * Beware that not every xact has an XID assigned. However, as long as you
3037  * only call this using an XID found on disk, you're safe.
3038  */
3039 int
3041 {
3042  int result = 0;
3043  ProcArrayStruct *arrayP = procArray;
3044  TransactionId *other_xids = ProcGlobal->xids;
3045  int index;
3046 
3047  if (xid == InvalidTransactionId) /* never match invalid xid */
3048  return 0;
3049 
3050  LWLockAcquire(ProcArrayLock, LW_SHARED);
3051 
3052  for (index = 0; index < arrayP->numProcs; index++)
3053  {
3054  int pgprocno = arrayP->pgprocnos[index];
3055  PGPROC *proc = &allProcs[pgprocno];
3056 
3057  if (other_xids[index] == xid)
3058  {
3059  result = proc->pid;
3060  break;
3061  }
3062  }
3063 
3064  LWLockRelease(ProcArrayLock);
3065 
3066  return result;
3067 }
3068 
3069 /*
3070  * IsBackendPid -- is a given pid a running backend
3071  *
3072  * This is not called by the backend, but is called by external modules.
3073  */
3074 bool
3076 {
3077  return (BackendPidGetProc(pid) != NULL);
3078 }
3079 
3080 
3081 /*
3082  * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
3083  *
3084  * The array is palloc'd. The number of valid entries is returned into *nvxids.
3085  *
3086  * The arguments allow filtering the set of VXIDs returned. Our own process
3087  * is always skipped. In addition:
3088  * If limitXmin is not InvalidTransactionId, skip processes with
3089  * xmin > limitXmin.
3090  * If excludeXmin0 is true, skip processes with xmin = 0.
3091  * If allDbs is false, skip processes attached to other databases.
3092  * If excludeVacuum isn't zero, skip processes for which
3093  * (vacuumFlags & excludeVacuum) is not zero.
3094  *
3095  * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
3096  * allow skipping backends whose oldest live snapshot is no older than
3097  * some snapshot we have. Since we examine the procarray with only shared
3098  * lock, there are race conditions: a backend could set its xmin just after
3099  * we look. Indeed, on multiprocessors with weak memory ordering, the
3100  * other backend could have set its xmin *before* we look. We know however
3101  * that such a backend must have held shared ProcArrayLock overlapping our
3102  * own hold of ProcArrayLock, else we would see its xmin update. Therefore,
3103  * any snapshot the other backend is taking concurrently with our scan cannot
3104  * consider any transactions as still running that we think are committed
3105  * (since backends must hold ProcArrayLock exclusive to commit).
3106  */
3108 GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
3109  bool allDbs, int excludeVacuum,
3110  int *nvxids)
3111 {
3112  VirtualTransactionId *vxids;
3113  ProcArrayStruct *arrayP = procArray;
3114  int count = 0;
3115  int index;
3116 
3117  /* allocate what's certainly enough result space */
3118  vxids = (VirtualTransactionId *)
3119  palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
3120 
3121  LWLockAcquire(ProcArrayLock, LW_SHARED);
3122 
3123  for (index = 0; index < arrayP->numProcs; index++)
3124  {
3125  int pgprocno = arrayP->pgprocnos[index];
3126  PGPROC *proc = &allProcs[pgprocno];
3127  uint8 vacuumFlags = ProcGlobal->vacuumFlags[index];
3128 
3129  if (proc == MyProc)
3130  continue;
3131 
3132  if (excludeVacuum & vacuumFlags)
3133  continue;
3134 
3135  if (allDbs || proc->databaseId == MyDatabaseId)
3136  {
3137  /* Fetch xmin just once - might change on us */
3138  TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
3139 
3140  if (excludeXmin0 && !TransactionIdIsValid(pxmin))
3141  continue;
3142 
3143  /*
3144  * InvalidTransactionId precedes all other XIDs, so a proc that
3145  * hasn't set xmin yet will not be rejected by this test.
3146  */
3147  if (!TransactionIdIsValid(limitXmin) ||
3148  TransactionIdPrecedesOrEquals(pxmin, limitXmin))
3149  {
3150  VirtualTransactionId vxid;
3151 
3152  GET_VXID_FROM_PGPROC(vxid, *proc);
3153  if (VirtualTransactionIdIsValid(vxid))
3154  vxids[count++] = vxid;
3155  }
3156  }
3157  }
3158 
3159  LWLockRelease(ProcArrayLock);
3160 
3161  *nvxids = count;
3162  return vxids;
3163 }
3164 
3165 /*
3166  * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
3167  *
3168  * Usage is limited to conflict resolution during recovery on standby servers.
3169  * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId
3170  * in cases where we cannot accurately determine a value for latestRemovedXid.
3171  *
3172  * If limitXmin is InvalidTransactionId then we want to kill everybody,
3173  * so we're not worried if they have a snapshot or not, nor does it really
3174  * matter what type of lock we hold.
3175  *
3176  * All callers that are checking xmins always now supply a valid and useful
3177  * value for limitXmin. The limitXmin is always lower than the lowest
3178  * numbered KnownAssignedXid that is not already a FATAL error. This is
3179  * because we only care about cleanup records that are cleaning up tuple
3180  * versions from committed transactions. In that case they will only occur
3181  * at the point where the record is less than the lowest running xid. That
3182  * allows us to say that if any backend takes a snapshot concurrently with
3183  * us then the conflict assessment made here would never include the snapshot
3184  * that is being derived. So we take LW_SHARED on the ProcArray and allow
3185  * concurrent snapshots when limitXmin is valid. We might think about adding
3186  * Assert(limitXmin < lowest(KnownAssignedXids))
3187  * but that would not be true in the case of FATAL errors lagging in array,
3188  * but we already know those are bogus anyway, so we skip that test.
3189  *
3190  * If dbOid is valid we skip backends attached to other databases.
3191  *
3192  * Be careful to *not* pfree the result from this function. We reuse
3193  * this array sufficiently often that we use malloc for the result.
3194  */
3197 {
3198  static VirtualTransactionId *vxids;
3199  ProcArrayStruct *arrayP = procArray;
3200  int count = 0;
3201  int index;
3202 
3203  /*
3204  * If first time through, get workspace to remember main XIDs in. We
3205  * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
3206  * result space, remembering room for a terminator.
3207  */
3208  if (vxids == NULL)
3209  {
3210  vxids = (VirtualTransactionId *)
3211  malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
3212  if (vxids == NULL)
3213  ereport(ERROR,
3214  (errcode(ERRCODE_OUT_OF_MEMORY),
3215  errmsg("out of memory")));
3216  }
3217 
3218  LWLockAcquire(ProcArrayLock, LW_SHARED);
3219 
3220  for (index = 0; index < arrayP->numProcs; index++)
3221  {
3222  int pgprocno = arrayP->pgprocnos[index];
3223  PGPROC *proc = &allProcs[pgprocno];
3224 
3225  /* Exclude prepared transactions */
3226  if (proc->pid == 0)
3227  continue;
3228 
3229  if (!OidIsValid(dbOid) ||
3230  proc->databaseId == dbOid)
3231  {
3232  /* Fetch xmin just once - can't change on us, but good coding */
3233  TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
3234 
3235  /*
3236  * We ignore an invalid pxmin because this means that backend has
3237  * no snapshot currently. We hold a Share lock to avoid contention
3238  * with users taking snapshots. That is not a problem because the
3239  * current xmin is always at least one higher than the latest
3240  * removed xid, so any new snapshot would never conflict with the
3241  * test here.
3242  */
3243  if (!TransactionIdIsValid(limitXmin) ||
3244  (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin)))
3245  {
3246  VirtualTransactionId vxid;
3247 
3248  GET_VXID_FROM_PGPROC(vxid, *proc);
3249  if (VirtualTransactionIdIsValid(vxid))
3250  vxids[count++] = vxid;
3251  }
3252  }
3253  }
3254 
3255  LWLockRelease(ProcArrayLock);
3256 
3257  /* add the terminator */
3258  vxids[count].backendId = InvalidBackendId;
3260 
3261  return vxids;
3262 }
3263 
3264 /*
3265  * CancelVirtualTransaction - used in recovery conflict processing
3266  *
3267  * Returns pid of the process signaled, or 0 if not found.
3268  */
3269 pid_t
3271 {
3272  ProcArrayStruct *arrayP = procArray;
3273  int index;
3274  pid_t pid = 0;
3275 
3276  LWLockAcquire(ProcArrayLock, LW_SHARED);
3277 
3278  for (index = 0; index < arrayP->numProcs; index++)
3279  {
3280  int pgprocno = arrayP->pgprocnos[index];
3281  PGPROC *proc = &allProcs[pgprocno];
3282  VirtualTransactionId procvxid;
3283 
3284  GET_VXID_FROM_PGPROC(procvxid, *proc);
3285 
3286  if (procvxid.backendId == vxid.backendId &&
3287  procvxid.localTransactionId == vxid.localTransactionId)
3288  {
3289  proc->recoveryConflictPending = true;
3290  pid = proc->pid;
3291  if (pid != 0)
3292  {
3293  /*
3294  * Kill the pid if it's still here. If not, that's what we
3295  * wanted so ignore any errors.
3296  */
3297  (void) SendProcSignal(pid, sigmode, vxid.backendId);
3298  }
3299  break;
3300  }
3301  }
3302 
3303  LWLockRelease(ProcArrayLock);
3304 
3305  return pid;
3306 }
3307 
3308 /*
3309  * MinimumActiveBackends --- count backends (other than myself) that are
3310  * in active transactions. Return true if the count exceeds the
3311  * minimum threshold passed. This is used as a heuristic to decide if
3312  * a pre-XLOG-flush delay is worthwhile during commit.
3313  *
3314  * Do not count backends that are blocked waiting for locks, since they are
3315  * not going to get to run until someone else commits.
3316  */
3317 bool
3319 {
3320  ProcArrayStruct *arrayP = procArray;
3321  int count = 0;
3322  int index;
3323 
3324  /* Quick short-circuit if no minimum is specified */
3325  if (min == 0)
3326  return true;
3327 
3328  /*
3329  * Note: for speed, we don't acquire ProcArrayLock. This is a little bit
3330  * bogus, but since we are only testing fields for zero or nonzero, it
3331  * should be OK. The result is only used for heuristic purposes anyway...
3332  */
3333  for (index = 0; index < arrayP->numProcs; index++)
3334  {
3335  int pgprocno = arrayP->pgprocnos[index];
3336  PGPROC *proc = &allProcs[pgprocno];
3337 
3338  /*
3339  * Since we're not holding a lock, need to be prepared to deal with
3340  * garbage, as someone could have incremented numProcs but not yet
3341  * filled the structure.
3342  *
3343  * If someone just decremented numProcs, 'proc' could also point to a
3344  * PGPROC entry that's no longer in the array. It still points to a
3345  * PGPROC struct, though, because freed PGPROC entries just go to the
3346  * free list and are recycled. Its contents are nonsense in that case,
3347  * but that's acceptable for this function.
3348  */
3349  if (pgprocno == -1)
3350  continue; /* do not count deleted entries */
3351  if (proc == MyProc)
3352  continue; /* do not count myself */
3353  if (proc->xid == InvalidTransactionId)
3354  continue; /* do not count if no XID assigned */
3355  if (proc->pid == 0)
3356  continue; /* do not count prepared xacts */
3357  if (proc->waitLock != NULL)
3358  continue; /* do not count if blocked on a lock */
3359  count++;
3360  if (count >= min)
3361  break;
3362  }
3363 
3364  return count >= min;
3365 }
3366 
3367 /*
3368  * CountDBBackends --- count backends that are using specified database
3369  */
3370 int
3372 {
3373  ProcArrayStruct *arrayP = procArray;
3374  int count = 0;
3375  int index;
3376 
3377  LWLockAcquire(ProcArrayLock, LW_SHARED);
3378 
3379  for (index = 0; index < arrayP->numProcs; index++)
3380  {
3381  int pgprocno = arrayP->pgprocnos[index];
3382  PGPROC *proc = &allProcs[pgprocno];
3383 
3384  if (proc->pid == 0)
3385  continue; /* do not count prepared xacts */
3386  if (!OidIsValid(databaseid) ||
3387  proc->databaseId == databaseid)
3388  count++;
3389  }
3390 
3391  LWLockRelease(ProcArrayLock);
3392 
3393  return count;
3394 }
3395 
3396 /*
3397  * CountDBConnections --- counts database backends ignoring any background
3398  * worker processes
3399  */
3400 int
3402 {
3403  ProcArrayStruct *arrayP = procArray;
3404  int count = 0;
3405  int index;
3406 
3407  LWLockAcquire(ProcArrayLock, LW_SHARED);
3408 
3409  for (index = 0; index < arrayP->numProcs; index++)
3410  {
3411  int pgprocno = arrayP->pgprocnos[index];
3412  PGPROC *proc = &allProcs[pgprocno];
3413 
3414  if (proc->pid == 0)
3415  continue; /* do not count prepared xacts */
3416  if (proc->isBackgroundWorker)
3417  continue; /* do not count background workers */
3418  if (!OidIsValid(databaseid) ||
3419  proc->databaseId == databaseid)
3420  count++;
3421  }
3422 
3423  LWLockRelease(ProcArrayLock);
3424 
3425  return count;
3426 }
3427 
3428 /*
3429  * CancelDBBackends --- cancel backends that are using specified database
3430  */
3431 void
3432 CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
3433 {
3434  ProcArrayStruct *arrayP = procArray;
3435  int index;
3436 
3437  /* tell all backends to die */
3438  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3439 
3440  for (index = 0; index < arrayP->numProcs; index++)
3441  {
3442  int pgprocno = arrayP->pgprocnos[index];
3443  PGPROC *proc = &allProcs[pgprocno];
3444 
3445  if (databaseid == InvalidOid || proc->databaseId == databaseid)
3446  {
3447  VirtualTransactionId procvxid;
3448  pid_t pid;
3449 
3450  GET_VXID_FROM_PGPROC(procvxid, *proc);
3451 
3452  proc->recoveryConflictPending = conflictPending;
3453  pid = proc->pid;
3454  if (pid != 0)
3455  {
3456  /*
3457  * Kill the pid if it's still here. If not, that's what we
3458  * wanted so ignore any errors.
3459  */
3460  (void) SendProcSignal(pid, sigmode, procvxid.backendId);
3461  }
3462  }
3463  }
3464 
3465  LWLockRelease(ProcArrayLock);
3466 }
3467 
3468 /*
3469  * CountUserBackends --- count backends that are used by specified user
3470  */
3471 int
3473 {
3474  ProcArrayStruct *arrayP = procArray;
3475  int count = 0;
3476  int index;
3477 
3478  LWLockAcquire(ProcArrayLock, LW_SHARED);
3479 
3480  for (index = 0; index < arrayP->numProcs; index++)
3481  {
3482  int pgprocno = arrayP->pgprocnos[index];
3483  PGPROC *proc = &allProcs[pgprocno];
3484 
3485  if (proc->pid == 0)
3486  continue; /* do not count prepared xacts */
3487  if (proc->isBackgroundWorker)
3488  continue; /* do not count background workers */
3489  if (proc->roleId == roleid)
3490  count++;
3491  }
3492 
3493  LWLockRelease(ProcArrayLock);
3494 
3495  return count;
3496 }
3497 
3498 /*
3499  * CountOtherDBBackends -- check for other backends running in the given DB
3500  *
3501  * If there are other backends in the DB, we will wait a maximum of 5 seconds
3502  * for them to exit. Autovacuum backends are encouraged to exit early by
3503  * sending them SIGTERM, but normal user backends are just waited for.
3504  *
3505  * The current backend is always ignored; it is caller's responsibility to
3506  * check whether the current backend uses the given DB, if it's important.
3507  *
3508  * Returns true if there are (still) other backends in the DB, false if not.
3509  * Also, *nbackends and *nprepared are set to the number of other backends
3510  * and prepared transactions in the DB, respectively.
3511  *
3512  * This function is used to interlock DROP DATABASE and related commands
3513  * against there being any active backends in the target DB --- dropping the
3514  * DB while active backends remain would be a Bad Thing. Note that we cannot
3515  * detect here the possibility of a newly-started backend that is trying to
3516  * connect to the doomed database, so additional interlocking is needed during
3517  * backend startup. The caller should normally hold an exclusive lock on the
3518  * target DB before calling this, which is one reason we mustn't wait
3519  * indefinitely.
3520  */
3521 bool
3522 CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
3523 {
3524  ProcArrayStruct *arrayP = procArray;
3525 
3526 #define MAXAUTOVACPIDS 10 /* max autovacs to SIGTERM per iteration */
3527  int autovac_pids[MAXAUTOVACPIDS];
3528  int tries;
3529 
3530  /* 50 tries with 100ms sleep between tries makes 5 sec total wait */
3531  for (tries = 0; tries < 50; tries++)
3532  {
3533  int nautovacs = 0;
3534  bool found = false;
3535  int index;
3536 
3538 
3539  *nbackends = *nprepared = 0;
3540 
3541  LWLockAcquire(ProcArrayLock, LW_SHARED);
3542 
3543  for (index = 0; index < arrayP->numProcs; index++)
3544  {
3545  int pgprocno = arrayP->pgprocnos[index];
3546  PGPROC *proc = &allProcs[pgprocno];
3547  uint8 vacuumFlags = ProcGlobal->vacuumFlags[index];
3548 
3549  if (proc->databaseId != databaseId)
3550  continue;
3551  if (proc == MyProc)
3552  continue;
3553 
3554  found = true;
3555 
3556  if (proc->pid == 0)
3557  (*nprepared)++;
3558  else
3559  {
3560  (*nbackends)++;
3561  if ((vacuumFlags & PROC_IS_AUTOVACUUM) &&
3562  nautovacs < MAXAUTOVACPIDS)
3563  autovac_pids[nautovacs++] = proc->pid;
3564  }
3565  }
3566 
3567  LWLockRelease(ProcArrayLock);
3568 
3569  if (!found)
3570  return false; /* no conflicting backends, so done */
3571 
3572  /*
3573  * Send SIGTERM to any conflicting autovacuums before sleeping. We
3574  * postpone this step until after the loop because we don't want to
3575  * hold ProcArrayLock while issuing kill(). We have no idea what might
3576  * block kill() inside the kernel...
3577  */
3578  for (index = 0; index < nautovacs; index++)
3579  (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */
3580 
3581  /* sleep, then try again */
3582  pg_usleep(100 * 1000L); /* 100ms */
3583  }
3584 
3585  return true; /* timed out, still conflicts */
3586 }
3587 
3588 /*
3589  * Terminate existing connections to the specified database. This routine
3590  * is used by the DROP DATABASE command when user has asked to forcefully
3591  * drop the database.
3592  *
3593  * The current backend is always ignored; it is caller's responsibility to
3594  * check whether the current backend uses the given DB, if it's important.
3595  *
3596  * It doesn't allow to terminate the connections even if there is a one
3597  * backend with the prepared transaction in the target database.
3598  */
3599 void
3601 {
3602  ProcArrayStruct *arrayP = procArray;
3603  List *pids = NIL;
3604  int nprepared = 0;
3605  int i;
3606 
3607  LWLockAcquire(ProcArrayLock, LW_SHARED);
3608 
3609  for (i = 0; i < procArray->numProcs; i++)
3610  {
3611  int pgprocno = arrayP->pgprocnos[i];
3612  PGPROC *proc = &allProcs[pgprocno];
3613 
3614  if (proc->databaseId != databaseId)
3615  continue;
3616  if (proc == MyProc)
3617  continue;
3618 
3619  if (proc->pid != 0)
3620  pids = lappend_int(pids, proc->pid);
3621  else
3622  nprepared++;
3623  }
3624 
3625  LWLockRelease(ProcArrayLock);
3626 
3627  if (nprepared > 0)
3628  ereport(ERROR,
3629  (errcode(ERRCODE_OBJECT_IN_USE),
3630  errmsg("database \"%s\" is being used by prepared transactions",
3631  get_database_name(databaseId)),
3632  errdetail_plural("There is %d prepared transaction using the database.",
3633  "There are %d prepared transactions using the database.",
3634  nprepared,
3635  nprepared)));
3636 
3637  if (pids)
3638  {
3639  ListCell *lc;
3640 
3641  /*
3642  * Check whether we have the necessary rights to terminate other
3643  * sessions. We don't terminate any session until we ensure that we
3644  * have rights on all the sessions to be terminated. These checks are
3645  * the same as we do in pg_terminate_backend.
3646  *
3647  * In this case we don't raise some warnings - like "PID %d is not a
3648  * PostgreSQL server process", because for us already finished session
3649  * is not a problem.
3650  */
3651  foreach(lc, pids)
3652  {
3653  int pid = lfirst_int(lc);
3654  PGPROC *proc = BackendPidGetProc(pid);
3655 
3656  if (proc != NULL)
3657  {
3658  /* Only allow superusers to signal superuser-owned backends. */
3659  if (superuser_arg(proc->roleId) && !superuser())
3660  ereport(ERROR,
3661  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3662  errmsg("must be a superuser to terminate superuser process")));
3663 
3664  /* Users can signal backends they have role membership in. */
3665  if (!has_privs_of_role(GetUserId(), proc->roleId) &&
3666  !has_privs_of_role(GetUserId(), DEFAULT_ROLE_SIGNAL_BACKENDID))
3667  ereport(ERROR,
3668  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3669  errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend")));
3670  }
3671  }
3672 
3673  /*
3674  * There's a race condition here: once we release the ProcArrayLock,
3675  * it's possible for the session to exit before we issue kill. That
3676  * race condition possibility seems too unlikely to worry about. See
3677  * pg_signal_backend.
3678  */
3679  foreach(lc, pids)
3680  {
3681  int pid = lfirst_int(lc);
3682  PGPROC *proc = BackendPidGetProc(pid);
3683 
3684  if (proc != NULL)
3685  {
3686  /*
3687  * If we have setsid(), signal the backend's whole process
3688  * group
3689  */
3690 #ifdef HAVE_SETSID
3691  (void) kill(-pid, SIGTERM);
3692 #else
3693  (void) kill(pid, SIGTERM);
3694 #endif
3695  }
3696  }
3697  }
3698 }
3699 
3700 /*
3701  * ProcArraySetReplicationSlotXmin
3702  *
3703  * Install limits to future computations of the xmin horizon to prevent vacuum
3704  * and HOT pruning from removing affected rows still needed by clients with
3705  * replication slots.
3706  */
3707 void
3709  bool already_locked)
3710 {
3711  Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
3712 
3713  if (!already_locked)
3714  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3715 
3716  procArray->replication_slot_xmin = xmin;
3717  procArray->replication_slot_catalog_xmin = catalog_xmin;
3718 
3719  if (!already_locked)
3720  LWLockRelease(ProcArrayLock);
3721 }
3722 
3723 /*
3724  * ProcArrayGetReplicationSlotXmin
3725  *
3726  * Return the current slot xmin limits. That's useful to be able to remove
3727  * data that's older than those limits.
3728  */
3729 void
3731  TransactionId *catalog_xmin)
3732 {
3733  LWLockAcquire(ProcArrayLock, LW_SHARED);
3734 
3735  if (xmin != NULL)
3736  *xmin = procArray->replication_slot_xmin;
3737 
3738  if (catalog_xmin != NULL)
3739  *catalog_xmin = procArray->replication_slot_catalog_xmin;
3740 
3741  LWLockRelease(ProcArrayLock);
3742 }
3743 
3744 /*
3745  * XidCacheRemoveRunningXids
3746  *
3747  * Remove a bunch of TransactionIds from the list of known-running
3748  * subtransactions for my backend. Both the specified xid and those in
3749  * the xids[] array (of length nxids) are removed from the subxids cache.
3750  * latestXid must be the latest XID among the group.
3751  */
3752 void
3754  int nxids, const TransactionId *xids,
3755  TransactionId latestXid)
3756 {
3757  int i,
3758  j;
3759  XidCacheStatus *mysubxidstat;
3760 
3762 
3763  /*
3764  * We must hold ProcArrayLock exclusively in order to remove transactions
3765  * from the PGPROC array. (See src/backend/access/transam/README.) It's
3766  * possible this could be relaxed since we know this routine is only used
3767  * to abort subtransactions, but pending closer analysis we'd best be
3768  * conservative.
3769  *
3770  * Note that we do not have to be careful about memory ordering of our own
3771  * reads wrt. GetNewTransactionId() here - only this process can modify
3772  * relevant fields of MyProc/ProcGlobal->xids[]. But we do have to be
3773  * careful about our own writes being well ordered.
3774  */
3775  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3776 
3777  mysubxidstat = &ProcGlobal->subxidStates[MyProc->pgxactoff];
3778 
3779  /*
3780  * Under normal circumstances xid and xids[] will be in increasing order,
3781  * as will be the entries in subxids. Scan backwards to avoid O(N^2)
3782  * behavior when removing a lot of xids.
3783  */
3784  for (i = nxids - 1; i >= 0; i--)
3785  {
3786  TransactionId anxid = xids[i];
3787 
3788  for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
3789  {
3790  if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
3791  {
3793  pg_write_barrier();
3794  mysubxidstat->count--;
3796  break;
3797  }
3798  }
3799 
3800  /*
3801  * Ordinarily we should have found it, unless the cache has
3802  * overflowed. However it's also possible for this routine to be
3803  * invoked multiple times for the same subtransaction, in case of an
3804  * error during AbortSubTransaction. So instead of Assert, emit a
3805  * debug warning.
3806  */
3807  if (j < 0 && !MyProc->subxidStatus.overflowed)
3808  elog(WARNING, "did not find subXID %u in MyProc", anxid);
3809  }
3810 
3811  for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
3812  {
3813  if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
3814  {
3816  pg_write_barrier();
3817  mysubxidstat->count--;
3819  break;
3820  }
3821  }
3822  /* Ordinarily we should have found it, unless the cache has overflowed */
3823  if (j < 0 && !MyProc->subxidStatus.overflowed)
3824  elog(WARNING, "did not find subXID %u in MyProc", xid);
3825 
3826  /* Also advance global latestCompletedXid while holding the lock */
3827  MaintainLatestCompletedXid(latestXid);
3828 
3829  LWLockRelease(ProcArrayLock);
3830 }
3831 
3832 #ifdef XIDCACHE_DEBUG
3833 
3834 /*
3835  * Print stats about effectiveness of XID cache
3836  */
3837 static void
3838 DisplayXidCache(void)
3839 {
3840  fprintf(stderr,
3841  "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n",
3842  xc_by_recent_xmin,
3843  xc_by_known_xact,
3844  xc_by_my_xact,
3845  xc_by_latest_xid,
3846  xc_by_main_xid,
3847  xc_by_child_xid,
3848  xc_by_known_assigned,
3849  xc_no_overflow,
3850  xc_slow_answer);
3851 }
3852 #endif /* XIDCACHE_DEBUG */
3853 
3854 /*
3855  * If rel != NULL, return test state appropriate for relation, otherwise
3856  * return state usable for all relations. The latter may consider XIDs as
3857  * not-yet-visible-to-everyone that a state for a specific relation would
3858  * already consider visible-to-everyone.
3859  *
3860  * This needs to be called while a snapshot is active or registered, otherwise
3861  * there are wraparound and other dangers.
3862  *
3863  * See comment for GlobalVisState for details.
3864  */
3867 {
3868  bool need_shared;
3869  bool need_catalog;
3871 
3872  /* XXX: we should assert that a snapshot is pushed or registered */
3873  Assert(RecentXmin);
3874 
3875  if (!rel)
3876  need_shared = need_catalog = true;
3877  else
3878  {
3879  /*
3880  * Other kinds currently don't contain xids, nor always the necessary
3881  * logical decoding markers.
3882  */
3883  Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
3884  rel->rd_rel->relkind == RELKIND_MATVIEW ||
3885  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
3886 
3887  need_shared = rel->rd_rel->relisshared || RecoveryInProgress();
3888  need_catalog = IsCatalogRelation(rel) || RelationIsAccessibleInLogicalDecoding(rel);
3889  }
3890 
3891  if (need_shared)
3892  state = &GlobalVisSharedRels;
3893  else if (need_catalog)
3894  state = &GlobalVisCatalogRels;
3895  else
3896  state = &GlobalVisDataRels;
3897 
3900 
3901  return state;
3902 }
3903 
3904 /*
3905  * Return true if it's worth updating the accurate maybe_needed boundary.
3906  *
3907  * As it is somewhat expensive to determine xmin horizons, we don't want to
3908  * repeatedly do so when there is a low likelihood of it being beneficial.
3909  *
3910  * The current heuristic is that we update only if RecentXmin has changed
3911  * since the last update. If the oldest currently running transaction has not
3912  * finished, it is unlikely that recomputing the horizon would be useful.
3913  */
3914 static bool
3916 {
3917  /* hasn't been updated yet */
3919  return true;
3920 
3921  /*
3922  * If the maybe_needed/definitely_needed boundaries are the same, it's
3923  * unlikely to be beneficial to refresh boundaries.
3924  */
3926  state->definitely_needed))
3927  return false;
3928 
3929  /* does the last snapshot built have a different xmin? */
3931 }
3932 
3933 static void
3935 {
3936  GlobalVisSharedRels.maybe_needed =
3938  horizons->shared_oldest_nonremovable);
3939  GlobalVisCatalogRels.maybe_needed =
3941  horizons->catalog_oldest_nonremovable);
3942  GlobalVisDataRels.maybe_needed =
3944  horizons->data_oldest_nonremovable);
3945 
3946  /*
3947  * In longer running transactions it's possible that transactions we
3948  * previously needed to treat as running aren't around anymore. So update
3949  * definitely_needed to not be earlier than maybe_needed.
3950  */
3951  GlobalVisSharedRels.definitely_needed =
3952  FullTransactionIdNewer(GlobalVisSharedRels.maybe_needed,
3953  GlobalVisSharedRels.definitely_needed);
3954  GlobalVisCatalogRels.definitely_needed =
3955  FullTransactionIdNewer(GlobalVisCatalogRels.maybe_needed,
3956  GlobalVisCatalogRels.definitely_needed);
3957  GlobalVisDataRels.definitely_needed =
3958  FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
3959  GlobalVisDataRels.definitely_needed);
3960 
3962 }
3963 
3964 /*
3965  * Update boundaries in GlobalVis{Shared,Catalog, Data}Rels
3966  * using ComputeXidHorizons().
3967  */
3968 static void
3970 {
3971  ComputeXidHorizonsResult horizons;
3972 
3973  /* updates the horizons as a side-effect */
3974  ComputeXidHorizons(&horizons);
3975 }
3976 
3977 /*
3978  * Return true if no snapshot still considers fxid to be running.
3979  *
3980  * The state passed needs to have been initialized for the relation fxid is
3981  * from (NULL is also OK), otherwise the result may not be correct.
3982  *
3983  * See comment for GlobalVisState for details.
3984  */
3985 bool
3987  FullTransactionId fxid)
3988 {
3989  /*
3990  * If fxid is older than maybe_needed bound, it definitely is visible to
3991  * everyone.
3992  */
3993  if (FullTransactionIdPrecedes(fxid, state->maybe_needed))
3994  return true;
3995 
3996  /*
3997  * If fxid is >= definitely_needed bound, it is very likely to still be
3998  * considered running.
3999  */
4001  return false;
4002 
4003  /*
4004  * fxid is between maybe_needed and definitely_needed, i.e. there might or
4005  * might not exist a snapshot considering fxid running. If it makes sense,
4006  * update boundaries and recheck.
4007  */
4008  if (GlobalVisTestShouldUpdate(state))
4009  {
4010  GlobalVisUpdate();
4011 
4013 
4014  return FullTransactionIdPrecedes(fxid, state->maybe_needed);
4015  }
4016  else
4017  return false;
4018 }
4019 
4020 /*
4021  * Wrapper around GlobalVisTestIsRemovableFullXid() for 32bit xids.
4022  *
4023  * It is crucial that this only gets called for xids from a source that
4024  * protects against xid wraparounds (e.g. from a table and thus protected by
4025  * relfrozenxid).
4026  */
4027 bool
4029 {
4030  FullTransactionId fxid;
4031 
4032  /*
4033  * Convert 32 bit argument to FullTransactionId. We can do so safely
4034  * because we know the xid has to, at the very least, be between
4035  * [oldestXid, nextFullXid), i.e. within 2 billion of xid. To avoid taking
4036  * a lock to determine either, we can just compare with
4037  * state->definitely_needed, which was based on those value at the time
4038  * the current snapshot was built.
4039  */
4040  fxid = FullXidRelativeTo(state->definitely_needed, xid);
4041 
4042  return GlobalVisTestIsRemovableFullXid(state, fxid);
4043 }
4044 
4045 /*
4046  * Return FullTransactionId below which all transactions are not considered
4047  * running anymore.
4048  *
4049  * Note: This is less efficient than testing with
4050  * GlobalVisTestIsRemovableFullXid as it likely requires building an accurate
4051  * cutoff, even in the case all the XIDs compared with the cutoff are outside
4052  * [maybe_needed, definitely_needed).
4053  */
4056 {
4057  /* acquire accurate horizon if not already done */
4058  if (GlobalVisTestShouldUpdate(state))
4059  GlobalVisUpdate();
4060 
4061  return state->maybe_needed;
4062 }
4063 
4064 /* Convenience wrapper around GlobalVisTestNonRemovableFullHorizon */
4067 {
4068  FullTransactionId cutoff;
4069 
4070  cutoff = GlobalVisTestNonRemovableFullHorizon(state);
4071 
4072  return XidFromFullTransactionId(cutoff);
4073 }
4074 
4075 /*
4076  * Convenience wrapper around GlobalVisTestFor() and
4077  * GlobalVisTestIsRemovableFullXid(), see their comments.
4078  */
4079 bool
4081 {
4083 
4084  state = GlobalVisTestFor(rel);
4085 
4086  return GlobalVisTestIsRemovableFullXid(state, fxid);
4087 }
4088 
4089 /*
4090  * Convenience wrapper around GlobalVisTestFor() and
4091  * GlobalVisTestIsRemovableXid(), see their comments.
4092  */
4093 bool
4095 {
4097 
4098  state = GlobalVisTestFor(rel);
4099 
4100  return GlobalVisTestIsRemovableXid(state, xid);
4101 }
4102 
4103 /*
4104  * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it
4105  * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel).
4106  *
4107  * Be very careful about when to use this function. It can only safely be used
4108  * when there is a guarantee that xid is within MaxTransactionId / 2 xids of
4109  * rel. That e.g. can be guaranteed if the caller assures a snapshot is
4110  * held by the backend and xid is from a table (where vacuum/freezing ensures
4111  * the xid has to be within that range), or if xid is from the procarray and
4112  * prevents xid wraparound that way.
4113  */
4114 static inline FullTransactionId
4116 {
4117  TransactionId rel_xid = XidFromFullTransactionId(rel);
4118 
4120  Assert(TransactionIdIsValid(rel_xid));
4121 
4122  /* not guaranteed to find issues, but likely to catch mistakes */
4124 
4126  + (int32) (xid - rel_xid));
4127 }
4128 
4129 
4130 /* ----------------------------------------------
4131  * KnownAssignedTransactionIds sub-module
4132  * ----------------------------------------------
4133  */
4134 
4135 /*
4136  * In Hot Standby mode, we maintain a list of transactions that are (or were)
4137  * running on the primary at the current point in WAL. These XIDs must be
4138  * treated as running by standby transactions, even though they are not in
4139  * the standby server's PGPROC array.
4140  *
4141  * We record all XIDs that we know have been assigned. That includes all the
4142  * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
4143  * been assigned. We can deduce the existence of unobserved XIDs because we
4144  * know XIDs are assigned in sequence, with no gaps. The KnownAssignedXids
4145  * list expands as new XIDs are observed or inferred, and contracts when
4146  * transaction completion records arrive.
4147  *
4148  * During hot standby we do not fret too much about the distinction between
4149  * top-level XIDs and subtransaction XIDs. We store both together in the
4150  * KnownAssignedXids list. In backends, this is copied into snapshots in
4151  * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot()
4152  * doesn't care about the distinction either. Subtransaction XIDs are
4153  * effectively treated as top-level XIDs and in the typical case pg_subtrans
4154  * links are *not* maintained (which does not affect visibility).
4155  *
4156  * We have room in KnownAssignedXids and in snapshots to hold maxProcs *
4157  * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every primary transaction must
4158  * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at
4159  * least every PGPROC_MAX_CACHED_SUBXIDS. When we receive one of these
4160  * records, we mark the subXIDs as children of the top XID in pg_subtrans,
4161  * and then remove them from KnownAssignedXids. This prevents overflow of
4162  * KnownAssignedXids and snapshots, at the cost that status checks for these
4163  * subXIDs will take a slower path through TransactionIdIsInProgress().
4164  * This means that KnownAssignedXids is not necessarily complete for subXIDs,
4165  * though it should be complete for top-level XIDs; this is the same situation
4166  * that holds with respect to the PGPROC entries in normal running.
4167  *
4168  * When we throw away subXIDs from KnownAssignedXids, we need to keep track of
4169  * that, similarly to tracking overflow of a PGPROC's subxids array. We do
4170  * that by remembering the lastOverflowedXid, ie the last thrown-away subXID.
4171  * As long as that is within the range of interesting XIDs, we have to assume
4172  * that subXIDs are missing from snapshots. (Note that subXID overflow occurs
4173  * on primary when 65th subXID arrives, whereas on standby it occurs when 64th
4174  * subXID arrives - that is not an error.)
4175  *
4176  * Should a backend on primary somehow disappear before it can write an abort
4177  * record, then we just leave those XIDs in KnownAssignedXids. They actually
4178  * aborted but we think they were running; the distinction is irrelevant
4179  * because either way any changes done by the transaction are not visible to
4180  * backends in the standby. We prune KnownAssignedXids when
4181  * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the
4182  * array due to such dead XIDs.
4183  */
4184 
4185 /*
4186  * RecordKnownAssignedTransactionIds
4187  * Record the given XID in KnownAssignedXids, as well as any preceding
4188  * unobserved XIDs.
4189  *
4190  * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
4191  * associated with a transaction. Must be called for each record after we
4192  * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
4193  *
4194  * Called during recovery in analogy with and in place of GetNewTransactionId()
4195  */
4196 void
4198 {
4202 
4203  elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
4204  xid, latestObservedXid);
4205 
4206  /*
4207  * When a newly observed xid arrives, it is frequently the case that it is
4208  * *not* the next xid in sequence. When this occurs, we must treat the
4209  * intervening xids as running also.
4210  */
4212  {
4213  TransactionId next_expected_xid;
4214 
4215  /*
4216  * Extend subtrans like we do in GetNewTransactionId() during normal
4217  * operation using individual extend steps. Note that we do not need
4218  * to extend clog since its extensions are WAL logged.
4219  *
4220  * This part has to be done regardless of standbyState since we
4221  * immediately start assigning subtransactions to their toplevel
4222  * transactions.
4223  */
4224  next_expected_xid = latestObservedXid;
4225  while (TransactionIdPrecedes(next_expected_xid, xid))
4226  {
4227  TransactionIdAdvance(next_expected_xid);
4228  ExtendSUBTRANS(next_expected_xid);
4229  }
4230  Assert(next_expected_xid == xid);
4231 
4232  /*
4233  * If the KnownAssignedXids machinery isn't up yet, there's nothing
4234  * more to do since we don't track assigned xids yet.
4235  */
4237  {
4238  latestObservedXid = xid;
4239  return;
4240  }
4241 
4242  /*
4243  * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
4244  */
4245  next_expected_xid = latestObservedXid;
4246  TransactionIdAdvance(next_expected_xid);
4247  KnownAssignedXidsAdd(next_expected_xid, xid, false);
4248 
4249  /*
4250  * Now we can advance latestObservedXid
4251  */
4252  latestObservedXid = xid;
4253 
4254  /* ShmemVariableCache->nextXid must be beyond any observed xid */
4256  next_expected_xid = latestObservedXid;
4257  TransactionIdAdvance(next_expected_xid);
4258  }
4259 }
4260 
4261 /*
4262  * ExpireTreeKnownAssignedTransactionIds
4263  * Remove the given XIDs from KnownAssignedXids.
4264  *
4265  * Called during recovery in analogy with and in place of ProcArrayEndTransaction()
4266  */
4267 void
4269  TransactionId *subxids, TransactionId max_xid)
4270 {
4272 
4273  /*
4274  * Uses same locking as transaction commit
4275  */
4276  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4277 
4278  KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
4279 
4280  /* As in ProcArrayEndTransaction, advance latestCompletedXid */
4282 
4283  LWLockRelease(ProcArrayLock);
4284 }
4285 
4286 /*
4287  * ExpireAllKnownAssignedTransactionIds
4288  * Remove all entries in KnownAssignedXids
4289  */
4290 void
4292 {
4293  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4295  LWLockRelease(ProcArrayLock);
4296 }
4297 
4298 /*
4299  * ExpireOldKnownAssignedTransactionIds
4300  * Remove KnownAssignedXids entries preceding the given XID
4301  */
4302 void
4304 {
4305  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4307  LWLockRelease(ProcArrayLock);
4308 }
4309 
4310 
4311 /*
4312  * Private module functions to manipulate KnownAssignedXids
4313  *
4314  * There are 5 main uses of the KnownAssignedXids data structure:
4315  *
4316  * * backends taking snapshots - all valid XIDs need to be copied out
4317  * * backends seeking to determine presence of a specific XID
4318  * * startup process adding new known-assigned XIDs
4319  * * startup process removing specific XIDs as transactions end
4320  * * startup process pruning array when special WAL records arrive
4321  *
4322  * This data structure is known to be a hot spot during Hot Standby, so we
4323  * go to some lengths to make these operations as efficient and as concurrent
4324  * as possible.
4325  *
4326  * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes
4327  * order, to be exact --- to allow binary search for specific XIDs. Note:
4328  * in general TransactionIdPrecedes would not provide a total order, but
4329  * we know that the entries present at any instant should not extend across
4330  * a large enough fraction of XID space to wrap around (the primary would
4331  * shut down for fear of XID wrap long before that happens). So it's OK to
4332  * use TransactionIdPrecedes as a binary-search comparator.
4333  *
4334  * It's cheap to maintain the sortedness during insertions, since new known
4335  * XIDs are always reported in XID order; we just append them at the right.
4336  *
4337  * To keep individual deletions cheap, we need to allow gaps in the array.
4338  * This is implemented by marking array elements as valid or invalid using
4339  * the parallel boolean array KnownAssignedXidsValid[]. A deletion is done
4340  * by setting KnownAssignedXidsValid[i] to false, *without* clearing the
4341  * XID entry itself. This preserves the property that the XID entries are
4342  * sorted, so we can do binary searches easily. Periodically we compress
4343  * out the unused entries; that's much cheaper than having to compress the
4344  * array immediately on every deletion.
4345  *
4346  * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
4347  * are those with indexes tail <= i < head; items outside this subscript range
4348  * have unspecified contents. When head reaches the end of the array, we
4349  * force compression of unused entries rather than wrapping around, since
4350  * allowing wraparound would greatly complicate the search logic. We maintain
4351  * an explicit tail pointer so that pruning of old XIDs can be done without
4352  * immediately moving the array contents. In most cases only a small fraction
4353  * of the array contains valid entries at any instant.
4354  *
4355  * Although only the startup process can ever change the KnownAssignedXids
4356  * data structure, we still need interlocking so that standby backends will
4357  * not observe invalid intermediate states. The convention is that backends
4358  * must hold shared ProcArrayLock to examine the array. To remove XIDs from
4359  * the array, the startup process must hold ProcArrayLock exclusively, for
4360  * the usual transactional reasons (compare commit/abort of a transaction
4361  * during normal running). Compressing unused entries out of the array
4362  * likewise requires exclusive lock. To add XIDs to the array, we just insert
4363  * them into slots to the right of the head pointer and then advance the head
4364  * pointer. This wouldn't require any lock at all, except that on machines
4365  * with weak memory ordering we need to be careful that other processors
4366  * see the array element changes before they see the head pointer change.
4367  * We handle this by using a spinlock to protect reads and writes of the
4368  * head/tail pointers. (We could dispense with the spinlock if we were to
4369  * create suitable memory access barrier primitives and use those instead.)
4370  * The spinlock must be taken to read or write the head/tail pointers unless
4371  * the caller holds ProcArrayLock exclusively.
4372  *
4373  * Algorithmic analysis:
4374  *
4375  * If we have a maximum of M slots, with N XIDs currently spread across
4376  * S elements then we have N <= S <= M always.
4377  *
4378  * * Adding a new XID is O(1) and needs little locking (unless compression
4379  * must happen)
4380  * * Compressing the array is O(S) and requires exclusive lock
4381  * * Removing an XID is O(logS) and requires exclusive lock
4382  * * Taking a snapshot is O(S) and requires shared lock
4383  * * Checking for an XID is O(logS) and requires shared lock
4384  *
4385  * In comparison, using a hash table for KnownAssignedXids would mean that
4386  * taking snapshots would be O(M). If we can maintain S << M then the
4387  * sorted array technique will deliver significantly faster snapshots.
4388  * If we try to keep S too small then we will spend too much time compressing,
4389  * so there is an optimal point for any workload mix. We use a heuristic to
4390  * decide when to compress the array, though trimming also helps reduce
4391  * frequency of compressing. The heuristic requires us to track the number of
4392  * currently valid XIDs in the array.
4393  */
4394 
4395 
4396 /*
4397  * Compress KnownAssignedXids by shifting valid data down to the start of the
4398  * array, removing any gaps.
4399  *
4400  * A compression step is forced if "force" is true, otherwise we do it
4401  * only if a heuristic indicates it's a good time to do it.
4402  *
4403  * Caller must hold ProcArrayLock in exclusive mode.
4404  */
4405 static void
4407 {
4408  ProcArrayStruct *pArray = procArray;
4409  int head,
4410  tail;
4411  int compress_index;
4412  int i;
4413 
4414  /* no spinlock required since we hold ProcArrayLock exclusively */
4415  head = pArray->headKnownAssignedXids;
4416  tail = pArray->tailKnownAssignedXids;
4417 
4418  if (!force)
4419  {
4420  /*
4421  * If we can choose how much to compress, use a heuristic to avoid
4422  * compressing too often or not often enough.
4423  *
4424  * Heuristic is if we have a large enough current spread and less than
4425  * 50% of the elements are currently in use, then compress. This
4426  * should ensure we compress fairly infrequently. We could compress
4427  * less often though the virtual array would spread out more and
4428  * snapshots would become more expensive.
4429  */
4430  int nelements = head - tail;
4431 
4432  if (nelements < 4 * PROCARRAY_MAXPROCS ||
4433  nelements < 2 * pArray->numKnownAssignedXids)
4434  return;
4435  }
4436 
4437  /*
4438  * We compress the array by reading the valid values from tail to head,
4439  * re-aligning data to 0th element.
4440  */
4441  compress_index = 0;
4442  for (i = tail; i < head; i++)
4443  {
4444  if (KnownAssignedXidsValid[i])
4445  {
4446  KnownAssignedXids[compress_index] = KnownAssignedXids[i];
4447  KnownAssignedXidsValid[compress_index] = true;
4448  compress_index++;
4449  }
4450  }
4451 
4452  pArray->tailKnownAssignedXids = 0;
4453  pArray->headKnownAssignedXids = compress_index;
4454 }
4455 
4456 /*
4457  * Add xids into KnownAssignedXids at the head of the array.
4458  *
4459  * xids from from_xid to to_xid, inclusive, are added to the array.
4460  *
4461  * If exclusive_lock is true then caller already holds ProcArrayLock in
4462  * exclusive mode, so we need no extra locking here. Else caller holds no
4463  * lock, so we need to be sure we maintain sufficient interlocks against
4464  * concurrent readers. (Only the startup process ever calls this, so no need
4465  * to worry about concurrent writers.)
4466  */
4467 static void
4469  bool exclusive_lock)
4470 {
4471  ProcArrayStruct *pArray = procArray;
4472  TransactionId next_xid;
4473  int head,
4474  tail;
4475  int nxids;
4476  int i;
4477 
4478  Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
4479 
4480  /*
4481  * Calculate how many array slots we'll need. Normally this is cheap; in
4482  * the unusual case where the XIDs cross the wrap point, we do it the hard
4483  * way.
4484  */
4485  if (to_xid >= from_xid)
4486  nxids = to_xid - from_xid + 1;
4487  else
4488  {
4489  nxids = 1;
4490  next_xid = from_xid;
4491  while (TransactionIdPrecedes(next_xid, to_xid))
4492  {
4493  nxids++;
4494  TransactionIdAdvance(next_xid);
4495  }
4496  }
4497 
4498  /*
4499  * Since only the startup process modifies the head/tail pointers, we
4500  * don't need a lock to read them here.
4501  */
4502  head = pArray->headKnownAssignedXids;
4503  tail = pArray->tailKnownAssignedXids;
4504 
4505  Assert(head >= 0 && head <= pArray->maxKnownAssignedXids);
4506  Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids);
4507 
4508  /*
4509  * Verify that insertions occur in TransactionId sequence. Note that even
4510  * if the last existing element is marked invalid, it must still have a
4511  * correctly sequenced XID value.
4512  */
4513  if (head > tail &&
4514  TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid))
4515  {
4517  elog(ERROR, "out-of-order XID insertion in KnownAssignedXids");
4518  }
4519 
4520  /*
4521  * If our xids won't fit in the remaining space, compress out free space
4522  */
4523  if (head + nxids > pArray->maxKnownAssignedXids)
4524  {
4525  /* must hold lock to compress */
4526  if (!exclusive_lock)
4527  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4528 
4530 
4531  head = pArray->headKnownAssignedXids;
4532  /* note: we no longer care about the tail pointer */
4533 
4534  if (!exclusive_lock)
4535  LWLockRelease(ProcArrayLock);
4536 
4537  /*
4538  * If it still won't fit then we're out of memory
4539  */
4540  if (head + nxids > pArray->maxKnownAssignedXids)
4541  elog(ERROR, "too many KnownAssignedXids");
4542  }
4543 
4544  /* Now we can insert the xids into the space starting at head */
4545  next_xid = from_xid;
4546  for (i = 0; i < nxids; i++)
4547  {
4548  KnownAssignedXids[head] = next_xid;
4549  KnownAssignedXidsValid[head] = true;
4550  TransactionIdAdvance(next_xid);
4551  head++;
4552  }
4553 
4554  /* Adjust count of number of valid entries */
4555  pArray->numKnownAssignedXids += nxids;
4556 
4557  /*
4558  * Now update the head pointer. We use a spinlock to protect this
4559  * pointer, not because the update is likely to be non-atomic, but to
4560  * ensure that other processors see the above array updates before they
4561  * see the head pointer change.
4562  *
4563  * If we're holding ProcArrayLock exclusively, there's no need to take the
4564  * spinlock.
4565  */
4566  if (exclusive_lock)
4567  pArray->headKnownAssignedXids = head;
4568  else
4569  {
4571  pArray->headKnownAssignedXids = head;
4573  }
4574 }
4575 
4576 /*
4577  * KnownAssignedXidsSearch
4578  *
4579  * Searches KnownAssignedXids for a specific xid and optionally removes it.
4580  * Returns true if it was found, false if not.
4581  *
4582  * Caller must hold ProcArrayLock in shared or exclusive mode.
4583  * Exclusive lock must be held for remove = true.
4584  */
4585 static bool
4587 {
4588  ProcArrayStruct *pArray = procArray;
4589  int first,
4590  last;
4591  int head;
4592  int tail;
4593  int result_index = -1;
4594 
4595  if (remove)
4596  {
4597  /* we hold ProcArrayLock exclusively, so no need for spinlock */
4598  tail = pArray->tailKnownAssignedXids;
4599  head = pArray->headKnownAssignedXids;
4600  }
4601  else
4602  {
4603  /* take spinlock to ensure we see up-to-date array contents */
4605  tail = pArray->tailKnownAssignedXids;
4606  head = pArray->headKnownAssignedXids;
4608  }
4609 
4610  /*
4611  * Standard binary search. Note we can ignore the KnownAssignedXidsValid
4612  * array here, since even invalid entries will contain sorted XIDs.
4613  */
4614  first = tail;
4615  last = head - 1;
4616  while (first <= last)
4617  {
4618  int mid_index;
4619  TransactionId mid_xid;
4620 
4621  mid_index = (first + last) / 2;
4622  mid_xid = KnownAssignedXids[mid_index];
4623 
4624  if (xid == mid_xid)
4625  {
4626  result_index = mid_index;
4627  break;
4628  }
4629  else if (TransactionIdPrecedes(xid, mid_xid))
4630  last = mid_index - 1;
4631  else
4632  first = mid_index + 1;
4633  }
4634 
4635  if (result_index < 0)
4636  return false; /* not in array */
4637 
4638  if (!KnownAssignedXidsValid[result_index])
4639  return false; /* in array, but invalid */
4640 
4641  if (remove)
4642  {
4643  KnownAssignedXidsValid[result_index] = false;
4644 
4645  pArray->numKnownAssignedXids--;
4646  Assert(pArray->numKnownAssignedXids >= 0);
4647 
4648  /*
4649  * If we're removing the tail element then advance tail pointer over
4650  * any invalid elements. This will speed future searches.
4651  */
4652  if (result_index == tail)
4653  {
4654  tail++;
4655  while (tail < head && !KnownAssignedXidsValid[tail])
4656  tail++;
4657  if (tail >= head)
4658  {
4659  /* Array is empty, so we can reset both pointers */
4660  pArray->headKnownAssignedXids = 0;
4661  pArray->tailKnownAssignedXids = 0;
4662  }
4663  else
4664  {
4665  pArray->tailKnownAssignedXids = tail;
4666  }
4667  }
4668  }
4669 
4670  return true;
4671 }
4672 
4673 /*
4674  * Is the specified XID present in KnownAssignedXids[]?
4675  *
4676  * Caller must hold ProcArrayLock in shared or exclusive mode.
4677  */
4678 static bool
4680 {
4682 
4683  return KnownAssignedXidsSearch(xid, false);
4684 }
4685 
4686 /*
4687  * Remove the specified XID from KnownAssignedXids[].
4688  *
4689  * Caller must hold ProcArrayLock in exclusive mode.
4690  */
4691 static void
4693 {
4695 
4696  elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
4697 
4698  /*
4699  * Note: we cannot consider it an error to remove an XID that's not
4700  * present. We intentionally remove subxact IDs while processing
4701  * XLOG_XACT_ASSIGNMENT, to avoid array overflow. Then those XIDs will be
4702  * removed again when the top-level xact commits or aborts.
4703  *
4704  * It might be possible to track such XIDs to distinguish this case from
4705  * actual errors, but it would be complicated and probably not worth it.
4706  * So, just ignore the search result.
4707  */
4708  (void) KnownAssignedXidsSearch(xid, true);
4709 }
4710 
4711 /*
4712  * KnownAssignedXidsRemoveTree
4713  * Remove xid (if it's not InvalidTransactionId) and all the subxids.
4714  *
4715  * Caller must hold ProcArrayLock in exclusive mode.
4716  */
4717 static void
4719  TransactionId *subxids)
4720 {
4721  int i;
4722 
4723  if (TransactionIdIsValid(xid))
4725 
4726  for (i = 0; i < nsubxids; i++)
4727  KnownAssignedXidsRemove(subxids[i]);
4728 
4729  /* Opportunistically compress the array */
4731 }
4732 
4733 /*
4734  * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid
4735  * then clear the whole table.
4736  *
4737  * Caller must hold ProcArrayLock in exclusive mode.
4738  */
4739 static void
4741 {
4742  ProcArrayStruct *pArray = procArray;
4743  int count = 0;
4744  int head,
4745  tail,
4746  i;
4747 
4748  if (!TransactionIdIsValid(removeXid))
4749  {
4750  elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
4751  pArray->numKnownAssignedXids = 0;
4752  pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0;
4753  return;
4754  }
4755 
4756  elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid);
4757 
4758  /*
4759  * Mark entries invalid starting at the tail. Since array is sorted, we
4760  * can stop as soon as we reach an entry >= removeXid.
4761  */
4762  tail = pArray->tailKnownAssignedXids;
4763  head = pArray->headKnownAssignedXids;
4764 
4765  for (i = tail; i < head; i++)
4766  {
4767  if (KnownAssignedXidsValid[i])
4768  {
4769  TransactionId knownXid = KnownAssignedXids[i];
4770 
4771  if (TransactionIdFollowsOrEquals(knownXid, removeXid))
4772  break;
4773 
4774  if (!StandbyTransactionIdIsPrepared(knownXid))
4775  {
4776  KnownAssignedXidsValid[i] = false;
4777  count++;
4778  }
4779  }
4780  }
4781 
4782  pArray->numKnownAssignedXids -= count;
4783  Assert(pArray->numKnownAssignedXids >= 0);
4784 
4785  /*
4786  * Advance the tail pointer if we've marked the tail item invalid.
4787  */
4788  for (i = tail; i < head; i++)
4789  {
4790  if (KnownAssignedXidsValid[i])
4791  break;
4792  }
4793  if (i >= head)
4794  {
4795  /* Array is empty, so we can reset both pointers */
4796  pArray->headKnownAssignedXids = 0;
4797  pArray->tailKnownAssignedXids = 0;
4798  }
4799  else
4800  {
4801  pArray->tailKnownAssignedXids = i;
4802  }
4803 
4804  /* Opportunistically compress the array */
4806 }
4807 
4808 /*
4809  * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
4810  * We filter out anything >= xmax.
4811  *
4812  * Returns the number of XIDs stored into xarray[]. Caller is responsible
4813  * that array is large enough.
4814  *
4815  * Caller must hold ProcArrayLock in (at least) shared mode.
4816  */
4817 static int
4819 {
4821 
4822  return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax);
4823 }
4824 
4825 /*
4826  * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus
4827  * we reduce *xmin to the lowest xid value seen if not already lower.
4828  *
4829  * Caller must hold ProcArrayLock in (at least) shared mode.
4830  */
4831 static int
4833  TransactionId xmax)
4834 {
4835  int count = 0;
4836  int head,
4837  tail;
4838  int i;
4839 
4840  /*
4841  * Fetch head just once, since it may change while we loop. We can stop
4842  * once we reach the initially seen head, since we are certain that an xid
4843  * cannot enter and then leave the array while we hold ProcArrayLock. We
4844  * might miss newly-added xids, but they should be >= xmax so irrelevant
4845  * anyway.
4846  *
4847  * Must take spinlock to ensure we see up-to-date array contents.
4848  */
4850  tail = procArray->tailKnownAssignedXids;
4851  head = procArray->headKnownAssignedXids;
4853 
4854  for (i = tail; i < head; i++)
4855  {
4856  /* Skip any gaps in the array */
4857  if (KnownAssignedXidsValid[i])
4858  {
4859  TransactionId knownXid = KnownAssignedXids[i];
4860 
4861  /*
4862  * Update xmin if required. Only the first XID need be checked,
4863  * since the array is sorted.
4864  */
4865  if (count == 0 &&
4866  TransactionIdPrecedes(knownXid, *xmin))
4867  *xmin = knownXid;
4868 
4869  /*
4870  * Filter out anything >= xmax, again relying on sorted property
4871  * of array.
4872  */
4873  if (TransactionIdIsValid(xmax) &&
4874  TransactionIdFollowsOrEquals(knownXid, xmax))
4875  break;
4876 
4877  /* Add knownXid into output array */
4878  xarray[count++] = knownXid;
4879  }
4880  }
4881 
4882  return count;
4883 }
4884 
4885 /*
4886  * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId
4887  * if nothing there.
4888  */
4889 static TransactionId
4891 {
4892  int head,
4893  tail;
4894  int i;
4895 
4896  /*
4897  * Fetch head just once, since it may change while we loop.
4898  */
4900  tail = procArray->tailKnownAssignedXids;
4901  head = procArray->headKnownAssignedXids;
4903 
4904  for (i = tail; i < head; i++)
4905  {
4906  /* Skip any gaps in the array */
4907  if (KnownAssignedXidsValid[i])
4908  return KnownAssignedXids[i];
4909  }
4910 
4911  return InvalidTransactionId;
4912 }
4913 
4914 /*
4915  * Display KnownAssignedXids to provide debug trail
4916  *
4917  * Currently this is only called within startup process, so we need no
4918  * special locking.
4919  *
4920  * Note this is pretty expensive, and much of the expense will be incurred
4921  * even if the elog message will get discarded. It's not currently called
4922  * in any performance-critical places, however, so no need to be tenser.
4923  */
4924 static void
4926 {
4927  ProcArrayStruct *pArray = procArray;
4929  int head,
4930  tail,
4931  i;
4932  int nxids = 0;
4933 
4934  tail = pArray->tailKnownAssignedXids;
4935  head = pArray->headKnownAssignedXids;
4936 
4937  initStringInfo(&buf);
4938 
4939  for (i = tail; i < head; i++)
4940  {
4941  if (KnownAssignedXidsValid[i])
4942  {
4943  nxids++;
4944  appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
4945  }
4946  }
4947 
4948  elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",
4949  nxids,
4950  pArray->numKnownAssignedXids,
4951  pArray->tailKnownAssignedXids,
4952  pArray->headKnownAssignedXids,
4953  buf.data);
4954 
4955  pfree(buf.data);
4956 }
4957 
4958 /*
4959  * KnownAssignedXidsReset
4960  * Resets KnownAssignedXids to be empty
4961  */
4962 static void
4964 {
4965  ProcArrayStruct *pArray = procArray;
4966 
4967  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4968 
4969  pArray->numKnownAssignedXids = 0;
4970  pArray->tailKnownAssignedXids = 0;
4971  pArray->headKnownAssignedXids = 0;
4972 
4973  LWLockRelease(ProcArrayLock);
4974 }
#define TransactionIdAdvance(dest)
Definition: transam.h:91
int slock_t
Definition: s_lock.h:934
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:985
#define NIL
Definition: pg_list.h:65
#define AmStartupProcess()
Definition: miscadmin.h:431
static TransactionId latestObservedXid
Definition: procarray.c:249
TransactionId oldest_considered_running
Definition: procarray.c:204
VirtualTransactionId * GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, bool allDbs, int excludeVacuum, int *nvxids)
Definition: procarray.c:3108
TransactionId oldestRunningXid
Definition: standby.h:76
bool procArrayGroupMember
Definition: proc.h:204
uint64 snapXactCompletionCount
Definition: snapshot.h:216
pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
Definition: procarray.c:3270
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:98
#define PROCARRAY_MAXPROCS
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition: procarray.c:2794
static void ComputeXidHorizons(ComputeXidHorizonsResult *h)
Definition: procarray.c:1641
FullTransactionId latest_completed
Definition: procarray.c:184
int CountDBBackends(Oid databaseid)
Definition: procarray.c:3371
#define DEBUG1
Definition: elog.h:25
#define likely(x)
Definition: c.h:205
TransactionId shared_oldest_nonremovable_raw
Definition: procarray.c:224
static void KnownAssignedXidsDisplay(int trace_level)
Definition: procarray.c:4925
#define GET_VXID_FROM_PGPROC(vxid, proc)
Definition: lock.h:79
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
#define PROC_IN_LOGICAL_DECODING
Definition: proc.h:57
BackendId backendId
Definition: proc.h:144
uint32 TransactionId
Definition: c.h:520
bool copied
Definition: snapshot.h:185
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition: varsup.c:277
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: posix_sema.c:340
#define DEBUG3
Definition: elog.h:23
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:869
Oid GetUserId(void)
Definition: miscinit.c:476
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:150
XidCacheStatus * subxidStates
Definition: proc.h:315
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1928
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
static void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:670
PGPROC * BackendPidGetProc(int pid)
Definition: procarray.c:2980
#define FullTransactionIdIsValid(x)
Definition: transam.h:55
PGPROC * MyProc
Definition: proc.c:67
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1305
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:102
TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids)
Definition: transam.c:365
int vacuum_defer_cleanup_age
Definition: standby.c:39
#define UINT32_ACCESS_ONCE(var)
Definition: procarray.c:69
#define SpinLockInit(lock)
Definition: spin.h:60
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:4892
VirtualTransactionId * GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
Definition: procarray.c:3196
TransactionId replication_slot_catalog_xmin
Definition: procarray.c:98
XLogRecPtr lsn
Definition: snapshot.h:209
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:349
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids)
Definition: procarray.c:2890
static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:724
unsigned char uint8
Definition: c.h:372
static FullTransactionId FullXidRelativeTo(FullTransactionId rel, TransactionId xid)
Definition: procarray.c:4115
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:283
#define xc_by_my_xact_inc()
Definition: procarray.c:302
static void MaintainLatestCompletedXidRecovery(TransactionId latestXid)
Definition: procarray.c:920
Oid roleId
Definition: proc.h:146
TransactionId oldestXid
Definition: transam.h:215
int errcode(int sqlerrcode)
Definition: elog.c:610
TransactionId RecentXmin
Definition: snapmgr.c:113
uint64 xactCompletionCount
Definition: transam.h:241
slock_t known_assigned_xids_lck
Definition: procarray.c:84
bool superuser(void)
Definition: superuser.c:46
PROC_HDR * ProcGlobal
Definition: proc.c:79
bool suboverflowed
Definition: snapshot.h:182
TransactionId * xids
Definition: standby.h:79
#define kill(pid, sig)
Definition: win32_port.h:426
bool GlobalVisTestIsRemovableFullXid(GlobalVisState *state, FullTransactionId fxid)
Definition: procarray.c:3986
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:125
FullTransactionId latestCompletedXid
Definition: transam.h:231
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:109
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:8074
static FullTransactionId FullTransactionIdFromU64(uint64 value)
Definition: transam.h:81
#define TransactionIdRetreat(dest)
Definition: transam.h:141
LocalTransactionId localTransactionId
Definition: lock.h:65
void ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:609
#define xc_by_child_xid_inc()
Definition: procarray.c:305
bool TransactionIdIsKnownCompleted(TransactionId transactionId)
Definition: transam.c:238
#define DEBUG4
Definition: elog.h:22
#define fprintf
Definition: port.h:197
void ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, TransactionId *subxids, TransactionId max_xid)
Definition: procarray.c:4268
#define MAXAUTOVACPIDS
FullTransactionId nextXid
Definition: transam.h:213
uint32 regd_count
Definition: snapshot.h:205
#define OidIsValid(objectId)
Definition: c.h:651
void ProcArrayGetReplicationSlotXmin(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:3730
TransactionId catalog_oldest_nonremovable
Definition: procarray.c:230
XidCacheStatus subxidStatus
Definition: proc.h:198
void ExpireAllKnownAssignedTransactionIds(void)
Definition: procarray.c:4291
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4028
signed int int32
Definition: c.h:362
int trace_recovery(int trace_level)
Definition: elog.c:3543
#define PROC_VACUUM_STATE_MASK
Definition: proc.h:61
bool overflowed
Definition: proc.h:43
#define XidFromFullTransactionId(x)
Definition: transam.h:48
TransactionId TransactionXmin
Definition: snapmgr.c:112
TransactionId latestCompletedXid
Definition: standby.h:77
FullTransactionId definitely_needed
Definition: procarray.c:169
Definition: type.h:89
#define malloc(a)
Definition: header.h:50
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1812
static uint32 pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval)
Definition: atomics.h:292
bool isBackgroundWorker
Definition: proc.h:151
#define VirtualTransactionIdEquals(vxid1, vxid2)
Definition: lock.h:73
#define xc_by_recent_xmin_inc()
Definition: procarray.c:300
#define xc_by_known_xact_inc()
Definition: procarray.c:301
static void GetSnapshotDataInitOldSnapshot(Snapshot snapshot)
Definition: procarray.c:1947
bool MinimumActiveBackends(int min)
Definition: procarray.c:3318
static void KnownAssignedXidsRemovePreceding(TransactionId xid)
Definition: procarray.c:4740
PGPROC * BackendPidGetProcWithLock(int pid)
Definition: procarray.c:3003
#define SpinLockAcquire(lock)
Definition: spin.h:62
void pg_usleep(long microsec)
Definition: signal.c:53
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:3866
int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
Definition: procsignal.c:250
#define AssertTransactionIdInAllowableRange(xid)
Definition: transam.h:294
void pfree(void *pointer)
Definition: mcxt.c:1057
#define PROC_IN_VACUUM
Definition: proc.h:55
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
int CountDBConnections(Oid databaseid)
Definition: procarray.c:3401
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1870
#define ERROR
Definition: elog.h:43
void XidCacheRemoveRunningXids(TransactionId xid, int nxids, const TransactionId *xids, TransactionId latestXid)
Definition: procarray.c:3753
#define FullTransactionIdIsNormal(x)
Definition: transam.h:58
TimestampTz GetSnapshotCurrentTimestamp(void)
Definition: snapmgr.c:1635
bool delayChkpt
Definition: proc.h:176
void ProcArrayClearTransaction(PGPROC *proc)
Definition: procarray.c:838
#define lfirst_int(lc)
Definition: pg_list.h:170
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:392
void ExtendSUBTRANS(TransactionId newestXact)
Definition: subtrans.c:307
void ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:514
#define FATAL
Definition: elog.h:52
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:11507
TransactionId slot_catalog_xmin
Definition: procarray.c:191
void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3432
TransactionId xmin
Definition: proc.h:129
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2155
#define xc_by_main_xid_inc()
Definition: procarray.c:304
static bool GlobalVisTestShouldUpdate(GlobalVisState *state)
Definition: procarray.c:3915
static GlobalVisState GlobalVisSharedRels
Definition: procarray.c:263
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1380
static char * buf
Definition: pg_test_fsync.c:68
bool recoveryConflictPending
Definition: proc.h:158
#define xc_by_known_assigned_inc()
Definition: procarray.c:306
bool IsUnderPostmaster
Definition: globals.c:109
VariableCache ShmemVariableCache
Definition: varsup.c:34
int maxKnownAssignedXids
Definition: procarray.c:80
#define InvalidTransactionId
Definition: transam.h:31
bool StandbyTransactionIdIsPrepared(TransactionId xid)
Definition: twophase.c:1369
TransactionId * xids
Definition: proc.h:309
static PGPROC * allProcs
Definition: procarray.c:242
Oid databaseId
Definition: proc.h:145
unsigned int uint32
Definition: c.h:374
TransactionId shared_oldest_nonremovable
Definition: procarray.c:213
TransactionId xmax
Definition: snapshot.h:158
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1386
TransactionId xmin
Definition: snapshot.h:157
static void KnownAssignedXidsReset(void)
Definition: procarray.c:4963
LOCK * waitLock
Definition: proc.h:170
int numKnownAssignedXids
Definition: procarray.c:81
static bool * KnownAssignedXidsValid
Definition: procarray.c:248
struct XidCache subxids
Definition: proc.h:200
TransactionId lastOverflowedXid
Definition: procarray.c:93
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:181
#define xc_by_latest_xid_inc()
Definition: procarray.c:303
bool superuser_arg(Oid roleid)
Definition: superuser.c:56
#define INVALID_PGPROCNO
Definition: proc.h:76
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId * xip
Definition: snapshot.h:168
static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:4718
pg_atomic_uint32 procArrayGroupNext
Definition: proc.h:206
List * lappend_int(List *list, int datum)
Definition: list.c:339
Definition: proc.h:303
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
static ProcArrayStruct * procArray
Definition: procarray.c:240
#define WARNING
Definition: elog.h:40
#define VirtualTransactionIdIsValid(vxid)
Definition: lock.h:70
signed char int8
Definition: c.h:360
static TransactionId ComputeXidHorizonsResultLastXmin
Definition: procarray.c:272
#define SpinLockRelease(lock)
Definition: spin.h:64
TransactionId replication_slot_xmin
Definition: procarray.c:96
Size mul_size(Size s1, Size s2)
Definition: shmem.c:515
int BackendXidGetPid(TransactionId xid)
Definition: procarray.c:3040
#define InvalidBackendId
Definition: backendid.h:23
static void MaintainLatestCompletedXid(TransactionId latestXid)
Definition: procarray.c:898
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:619
void GetReplicationHorizons(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:1905
static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
Definition: procarray.c:4818
bool GlobalVisIsRemovableFullXid(Relation rel, FullTransactionId fxid)
Definition: procarray.c:4080
Size add_size(Size s1, Size s2)
Definition: shmem.c:498
Oid MyDatabaseId
Definition: globals.c:85
static TransactionId KnownAssignedXidsGetOldestXmin(void)
Definition: procarray.c:4890
#define InvalidOid
Definition: postgres_ext.h:36
CommandId curcid
Definition: snapshot.h:187
#define ereport(elevel,...)
Definition: elog.h:144
bool GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
Definition: procarray.c:4094
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:1927
int pgprocnos[FLEXIBLE_ARRAY_MEMBER]
Definition: procarray.c:101
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]
Definition: proc.h:48
#define TOTAL_MAX_CACHED_SUBXIDS
static TransactionId TransactionIdOlder(TransactionId a, TransactionId b)
Definition: transam.h:327
#define Assert(condition)
Definition: c.h:745
static TransactionId * KnownAssignedXids
Definition: procarray.c:247
BackendId backendId
Definition: lock.h:64
Definition: regguts.h:298
#define pg_read_barrier()
Definition: atomics.h:158
#define U64FromFullTransactionId(x)
Definition: transam.h:49
void CreateSharedProcArray(void)
Definition: procarray.c:383
#define FullTransactionIdFollowsOrEquals(a, b)
Definition: transam.h:54
bool takenDuringRecovery
Definition: snapshot.h:184
size_t Size
Definition: c.h:473
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1362
struct ComputeXidHorizonsResult ComputeXidHorizonsResult
Snapshot GetSnapshotData(Snapshot snapshot)
Definition: procarray.c:2064
int errdetail_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1049
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1208
static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
Definition: procarray.c:3934
static TransactionId TransactionIdRetreatedBy(TransactionId xid, uint32 amount)
Definition: transam.h:315
static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, TransactionId xmax)
Definition: procarray.c:4832
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, bool exclusive_lock)
Definition: procarray.c:4468
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:1892
bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
Definition: procarray.c:2491
#define NormalTransactionIdPrecedes(id1, id2)
Definition: transam.h:147
#define xc_no_overflow_inc()
Definition: procarray.c:307
bool EnableHotStandby
Definition: xlog.c:97
FullTransactionId maybe_needed
Definition: procarray.c:172
void PGSemaphoreLock(PGSemaphore sema)
Definition: posix_sema.c:320
static void KnownAssignedXidsCompress(bool force)
Definition: procarray.c:4406
uint8 count
Definition: proc.h:41
int CountUserBackends(Oid roleid)
Definition: procarray.c:3472
TransactionId xid
Definition: proc.h:124
static FullTransactionId FullTransactionIdNewer(FullTransactionId a, FullTransactionId b)
Definition: transam.h:353
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:393
static bool KnownAssignedXidExists(TransactionId xid)
Definition: procarray.c:4679
int pgprocno
Definition: proc.h:141
TransactionId nextXid
Definition: standby.h:75
bool TransactionIdIsActive(TransactionId xid)
Definition: procarray.c:1537
#define xc_slow_answer_inc()
Definition: procarray.c:308
pg_atomic_uint32 procArrayGroupFirst
Definition: proc.h:334
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:954
uint32 xcnt
Definition: snapshot.h:169
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:824
struct ProcArrayStruct ProcArrayStruct
static bool KnownAssignedXidsSearch(TransactionId xid, bool remove)
Definition: procarray.c:4586
FullTransactionId GlobalVisTestNonRemovableFullHorizon(GlobalVisState *state)
Definition: procarray.c:4055
static void KnownAssignedXidsRemove(TransactionId xid)
Definition: procarray.c:4692
#define elog(elevel,...)
Definition: elog.h:214
#define InvalidLocalTransactionId
Definition: lock.h:68
TransactionId data_oldest_nonremovable
Definition: procarray.c:236
int i
int pgxactoff
Definition: proc.h:139
void ExpireOldKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:4303
uint8 * vacuumFlags
Definition: proc.h:321
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2729
bool IsBackendPid(int pid)
Definition: procarray.c:3075
#define pg_write_barrier()
Definition: atomics.h:159
ProcSignalReason
Definition: procsignal.h:30
static bool GetSnapshotDataReuse(Snapshot snapshot)
Definition: procarray.c:1981
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
Definition: procarray.c:3708
#define unlikely(x)
Definition: c.h:206
int GetMaxSnapshotSubxidCount(void)
Definition: procarray.c:1938
uint8 vacuumFlags
Definition: proc.h:178
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2554
void ProcArrayApplyXidAssignment(TransactionId topxid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:1221
TimestampTz whenTaken
Definition: snapshot.h:208
void TerminateOtherDBBackends(Oid databaseId)
Definition: procarray.c:3600
PGPROC * allProcs
Definition: proc.h:306
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
bool CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
Definition: procarray.c:3522
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:761
#define qsort(a, b, c, d)
Definition: port.h:475
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static void GlobalVisUpdate(void)
Definition: procarray.c:3969
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
Definition: snapmgr.c:1854
PGSemaphore sem
Definition: proc.h:118
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
Definition: procarray.c:2934
static GlobalVisState GlobalVisCatalogRels
Definition: procarray.c:264
TransactionId GlobalVisTestNonRemovableHorizon(GlobalVisState *state)
Definition: procarray.c:4066
void RecordKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:4197
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void SubTransSetParent(TransactionId xid, TransactionId parent)
Definition: subtrans.c:74
static GlobalVisState GlobalVisDataRels
Definition: procarray.c:265
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
Definition: procarray.c:2416
int tailKnownAssignedXids
Definition: procarray.c:82
TransactionId slot_xmin
Definition: procarray.c:190
static TransactionId standbySnapshotPendingXmin
Definition: procarray.c:256
Definition: proc.h:112
Definition: pg_list.h:50
int pid
Definition: proc.h:137
HotStandbyState standbyState
Definition: xlog.c:208
void ProcArrayAdd(PGPROC *proc)
Definition: procarray.c:434
#define PROC_IS_AUTOVACUUM
Definition: proc.h:54
#define offsetof(type, field)
Definition: c.h:668
TransactionId procArrayGroupMemberXid
Definition: proc.h:212
Size ProcArrayShmemSize(void)
Definition: procarray.c:341
void StandbyReleaseOldLocks(TransactionId oldxid)
Definition: standby.c:773
TransactionId * subxip
Definition: snapshot.h:180
uint32 active_count
Definition: snapshot.h:204
int headKnownAssignedXids
Definition: procarray.c:83
int xidComparator(const void *arg1, const void *arg2)
Definition: xid.c:139
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
int32 subxcnt
Definition: snapshot.h:181
LocalTransactionId lxid
Definition: proc.h:134