PostgreSQL Source Code  git master
procarray.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * procarray.c
4  * POSTGRES process array code.
5  *
6  *
7  * This module maintains arrays of PGPROC substructures, as well as associated
8  * arrays in ProcGlobal, for all active backends. Although there are several
9  * uses for this, the principal one is as a means of determining the set of
10  * currently running transactions.
11  *
12  * Because of various subtle race conditions it is critical that a backend
13  * hold the correct locks while setting or clearing its xid (in
14  * ProcGlobal->xids[]/MyProc->xid). See notes in
15  * src/backend/access/transam/README.
16  *
17  * The process arrays now also include structures representing prepared
18  * transactions. The xid and subxids fields of these are valid, as are the
19  * myProcLocks lists. They can be distinguished from regular backend PGPROCs
20  * at need by checking for pid == 0.
21  *
22  * During hot standby, we also keep a list of XIDs representing transactions
23  * that are known to be running on the primary (or more precisely, were running
24  * as of the current point in the WAL stream). This list is kept in the
25  * KnownAssignedXids array, and is updated by watching the sequence of
26  * arriving XIDs. This is necessary because if we leave those XIDs out of
27  * snapshots taken for standby queries, then they will appear to be already
28  * complete, leading to MVCC failures. Note that in hot standby, the PGPROC
29  * array represents standby processes, which by definition are not running
30  * transactions that have XIDs.
31  *
32  * It is perhaps possible for a backend on the primary to terminate without
33  * writing an abort record for its transaction. While that shouldn't really
34  * happen, it would tie up KnownAssignedXids indefinitely, so we protect
35  * ourselves by pruning the array when a valid list of running XIDs arrives.
36  *
37  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
38  * Portions Copyright (c) 1994, Regents of the University of California
39  *
40  *
41  * IDENTIFICATION
42  * src/backend/storage/ipc/procarray.c
43  *
44  *-------------------------------------------------------------------------
45  */
46 #include "postgres.h"
47 
48 #include <signal.h>
49 
50 #include "access/clog.h"
51 #include "access/subtrans.h"
52 #include "access/transam.h"
53 #include "access/twophase.h"
54 #include "access/xact.h"
55 #include "access/xlog.h"
56 #include "catalog/catalog.h"
57 #include "catalog/pg_authid.h"
58 #include "commands/dbcommands.h"
59 #include "miscadmin.h"
60 #include "pgstat.h"
61 #include "storage/proc.h"
62 #include "storage/procarray.h"
63 #include "storage/spin.h"
64 #include "utils/acl.h"
65 #include "utils/builtins.h"
66 #include "utils/rel.h"
67 #include "utils/snapmgr.h"
68 
69 #define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
70 
71 /* Our shared memory area */
72 typedef struct ProcArrayStruct
73 {
74  int numProcs; /* number of valid procs entries */
75  int maxProcs; /* allocated size of procs array */
76 
77  /*
78  * Known assigned XIDs handling
79  */
80  int maxKnownAssignedXids; /* allocated size of array */
81  int numKnownAssignedXids; /* current # of valid entries */
82  int tailKnownAssignedXids; /* index of oldest valid element */
83  int headKnownAssignedXids; /* index of newest element, + 1 */
84  slock_t known_assigned_xids_lck; /* protects head/tail pointers */
85 
86  /*
87  * Highest subxid that has been removed from KnownAssignedXids array to
88  * prevent overflow; or InvalidTransactionId if none. We track this for
89  * similar reasons to tracking overflowing cached subxids in PGPROC
90  * entries. Must hold exclusive ProcArrayLock to change this, and shared
91  * lock to read it.
92  */
94 
95  /* oldest xmin of any replication slot */
97  /* oldest catalog xmin of any replication slot */
99 
100  /* indexes into allProcs[], has PROCARRAY_MAXPROCS entries */
103 
104 /*
105  * State for the GlobalVisTest* family of functions. Those functions can
106  * e.g. be used to decide if a deleted row can be removed without violating
107  * MVCC semantics: If the deleted row's xmax is not considered to be running
108  * by anyone, the row can be removed.
109  *
110  * To avoid slowing down GetSnapshotData(), we don't calculate a precise
111  * cutoff XID while building a snapshot (looking at the frequently changing
112  * xmins scales badly). Instead we compute two boundaries while building the
113  * snapshot:
114  *
115  * 1) definitely_needed, indicating that rows deleted by XIDs >=
116  * definitely_needed are definitely still visible.
117  *
118  * 2) maybe_needed, indicating that rows deleted by XIDs < maybe_needed can
119  * definitely be removed
120  *
121  * When testing an XID that falls in between the two (i.e. XID >= maybe_needed
122  * && XID < definitely_needed), the boundaries can be recomputed (using
123  * ComputeXidHorizons()) to get a more accurate answer. This is cheaper than
124  * maintaining an accurate value all the time.
125  *
126  * As it is not cheap to compute accurate boundaries, we limit the number of
127  * times that happens in short succession. See GlobalVisTestShouldUpdate().
128  *
129  *
130  * There are three backend lifetime instances of this struct, optimized for
131  * different types of relations. As e.g. a normal user defined table in one
132  * database is inaccessible to backends connected to another database, a test
133  * specific to a relation can be more aggressive than a test for a shared
134  * relation. Currently we track four different states:
135  *
136  * 1) GlobalVisSharedRels, which only considers an XID's
137  * effects visible-to-everyone if neither snapshots in any database, nor a
138  * replication slot's xmin, nor a replication slot's catalog_xmin might
139  * still consider XID as running.
140  *
141  * 2) GlobalVisCatalogRels, which only considers an XID's
142  * effects visible-to-everyone if neither snapshots in the current
143  * database, nor a replication slot's xmin, nor a replication slot's
144  * catalog_xmin might still consider XID as running.
145  *
146  * I.e. the difference to GlobalVisSharedRels is that
147  * snapshot in other databases are ignored.
148  *
149  * 3) GlobalVisDataRels, which only considers an XID's
150  * effects visible-to-everyone if neither snapshots in the current
151  * database, nor a replication slot's xmin consider XID as running.
152  *
153  * I.e. the difference to GlobalVisCatalogRels is that
154  * replication slot's catalog_xmin is not taken into account.
155  *
156  * 4) GlobalVisTempRels, which only considers the current session, as temp
157  * tables are not visible to other sessions.
158  *
159  * GlobalVisTestFor(relation) returns the appropriate state
160  * for the relation.
161  *
162  * The boundaries are FullTransactionIds instead of TransactionIds to avoid
163  * wraparound dangers. There e.g. would otherwise exist no procarray state to
164  * prevent maybe_needed to become old enough after the GetSnapshotData()
165  * call.
166  *
167  * The typedef is in the header.
168  */
170 {
171  /* XIDs >= are considered running by some backend */
173 
174  /* XIDs < are not considered to be running by any backend */
176 };
177 
178 /*
179  * Result of ComputeXidHorizons().
180  */
182 {
183  /*
184  * The value of ShmemVariableCache->latestCompletedXid when
185  * ComputeXidHorizons() held ProcArrayLock.
186  */
188 
189  /*
190  * The same for procArray->replication_slot_xmin and.
191  * procArray->replication_slot_catalog_xmin.
192  */
195 
196  /*
197  * Oldest xid that any backend might still consider running. This needs to
198  * include processes running VACUUM, in contrast to the normal visibility
199  * cutoffs, as vacuum needs to be able to perform pg_subtrans lookups when
200  * determining visibility, but doesn't care about rows above its xmin to
201  * be removed.
202  *
203  * This likely should only be needed to determine whether pg_subtrans can
204  * be truncated. It currently includes the effects of replication slots,
205  * for historical reasons. But that could likely be changed.
206  */
208 
209  /*
210  * Oldest xid for which deleted tuples need to be retained in shared
211  * tables.
212  *
213  * This includes the effects of replication slots. If that's not desired,
214  * look at shared_oldest_nonremovable_raw;
215  */
217 
218  /*
219  * Oldest xid that may be necessary to retain in shared tables. This is
220  * the same as shared_oldest_nonremovable, except that is not affected by
221  * replication slot's catalog_xmin.
222  *
223  * This is mainly useful to be able to send the catalog_xmin to upstream
224  * streaming replication servers via hot_standby_feedback, so they can
225  * apply the limit only when accessing catalog tables.
226  */
228 
229  /*
230  * Oldest xid for which deleted tuples need to be retained in non-shared
231  * catalog tables.
232  */
234 
235  /*
236  * Oldest xid for which deleted tuples need to be retained in normal user
237  * defined tables.
238  */
240 
241  /*
242  * Oldest xid for which deleted tuples need to be retained in this
243  * session's temporary tables.
244  */
246 
248 
249 
251 
252 static PGPROC *allProcs;
253 
254 /*
255  * Bookkeeping for tracking emulated transactions in recovery
256  */
260 
261 /*
262  * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
263  * the highest xid that might still be running that we don't have in
264  * KnownAssignedXids.
265  */
267 
268 /*
269  * State for visibility checks on different types of relations. See struct
270  * GlobalVisState for details. As shared, catalog, normal and temporary
271  * relations can have different horizons, one such state exists for each.
272  */
277 
278 /*
279  * This backend's RecentXmin at the last time the accurate xmin horizon was
280  * recomputed, or InvalidTransactionId if it has not. Used to limit how many
281  * times accurate horizons are recomputed. See GlobalVisTestShouldUpdate().
282  */
284 
285 #ifdef XIDCACHE_DEBUG
286 
287 /* counters for XidCache measurement */
288 static long xc_by_recent_xmin = 0;
289 static long xc_by_known_xact = 0;
290 static long xc_by_my_xact = 0;
291 static long xc_by_latest_xid = 0;
292 static long xc_by_main_xid = 0;
293 static long xc_by_child_xid = 0;
294 static long xc_by_known_assigned = 0;
295 static long xc_no_overflow = 0;
296 static long xc_slow_answer = 0;
297 
298 #define xc_by_recent_xmin_inc() (xc_by_recent_xmin++)
299 #define xc_by_known_xact_inc() (xc_by_known_xact++)
300 #define xc_by_my_xact_inc() (xc_by_my_xact++)
301 #define xc_by_latest_xid_inc() (xc_by_latest_xid++)
302 #define xc_by_main_xid_inc() (xc_by_main_xid++)
303 #define xc_by_child_xid_inc() (xc_by_child_xid++)
304 #define xc_by_known_assigned_inc() (xc_by_known_assigned++)
305 #define xc_no_overflow_inc() (xc_no_overflow++)
306 #define xc_slow_answer_inc() (xc_slow_answer++)
307 
308 static void DisplayXidCache(void);
309 #else /* !XIDCACHE_DEBUG */
310 
311 #define xc_by_recent_xmin_inc() ((void) 0)
312 #define xc_by_known_xact_inc() ((void) 0)
313 #define xc_by_my_xact_inc() ((void) 0)
314 #define xc_by_latest_xid_inc() ((void) 0)
315 #define xc_by_main_xid_inc() ((void) 0)
316 #define xc_by_child_xid_inc() ((void) 0)
317 #define xc_by_known_assigned_inc() ((void) 0)
318 #define xc_no_overflow_inc() ((void) 0)
319 #define xc_slow_answer_inc() ((void) 0)
320 #endif /* XIDCACHE_DEBUG */
321 
322 /* Primitives for KnownAssignedXids array handling for standby */
323 static void KnownAssignedXidsCompress(bool force);
324 static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
325  bool exclusive_lock);
326 static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
327 static bool KnownAssignedXidExists(TransactionId xid);
328 static void KnownAssignedXidsRemove(TransactionId xid);
329 static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
330  TransactionId *subxids);
332 static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
334  TransactionId *xmin,
335  TransactionId xmax);
337 static void KnownAssignedXidsDisplay(int trace_level);
338 static void KnownAssignedXidsReset(void);
339 static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid);
340 static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
341 static void MaintainLatestCompletedXid(TransactionId latestXid);
343 
345  TransactionId xid);
346 static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons);
347 
348 /*
349  * Report shared-memory space needed by CreateSharedProcArray.
350  */
351 Size
353 {
354  Size size;
355 
356  /* Size of the ProcArray structure itself */
357 #define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts)
358 
360  size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
361 
362  /*
363  * During Hot Standby processing we have a data structure called
364  * KnownAssignedXids, created in shared memory. Local data structures are
365  * also created in various backends during GetSnapshotData(),
366  * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
367  * main structures created in those functions must be identically sized,
368  * since we may at times copy the whole of the data structures around. We
369  * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
370  *
371  * Ideally we'd only create this structure if we were actually doing hot
372  * standby in the current run, but we don't know that yet at the time
373  * shared memory is being set up.
374  */
375 #define TOTAL_MAX_CACHED_SUBXIDS \
376  ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
377 
378  if (EnableHotStandby)
379  {
380  size = add_size(size,
381  mul_size(sizeof(TransactionId),
383  size = add_size(size,
384  mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
385  }
386 
387  return size;
388 }
389 
390 /*
391  * Initialize the shared PGPROC array during postmaster startup.
392  */
393 void
395 {
396  bool found;
397 
398  /* Create or attach to the ProcArray shared structure */
399  procArray = (ProcArrayStruct *)
400  ShmemInitStruct("Proc Array",
402  mul_size(sizeof(int),
404  &found);
405 
406  if (!found)
407  {
408  /*
409  * We're the first - initialize.
410  */
411  procArray->numProcs = 0;
412  procArray->maxProcs = PROCARRAY_MAXPROCS;
414  procArray->numKnownAssignedXids = 0;
415  procArray->tailKnownAssignedXids = 0;
416  procArray->headKnownAssignedXids = 0;
422  }
423 
424  allProcs = ProcGlobal->allProcs;
425 
426  /* Create or attach to the KnownAssignedXids arrays too, if needed */
427  if (EnableHotStandby)
428  {
430  ShmemInitStruct("KnownAssignedXids",
431  mul_size(sizeof(TransactionId),
433  &found);
434  KnownAssignedXidsValid = (bool *)
435  ShmemInitStruct("KnownAssignedXidsValid",
436  mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
437  &found);
438  }
439 }
440 
441 /*
442  * Add the specified PGPROC to the shared array.
443  */
444 void
446 {
447  ProcArrayStruct *arrayP = procArray;
448  int index;
449 
450  /* See ProcGlobal comment explaining why both locks are held */
451  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
452  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
453 
454  if (arrayP->numProcs >= arrayP->maxProcs)
455  {
456  /*
457  * Oops, no room. (This really shouldn't happen, since there is a
458  * fixed supply of PGPROC structs too, and so we should have failed
459  * earlier.)
460  */
461  ereport(FATAL,
462  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
463  errmsg("sorry, too many clients already")));
464  }
465 
466  /*
467  * Keep the procs array sorted by (PGPROC *) so that we can utilize
468  * locality of references much better. This is useful while traversing the
469  * ProcArray because there is an increased likelihood of finding the next
470  * PGPROC structure in the cache.
471  *
472  * Since the occurrence of adding/removing a proc is much lower than the
473  * access to the ProcArray itself, the overhead should be marginal
474  */
475  for (index = 0; index < arrayP->numProcs; index++)
476  {
477  /*
478  * If we are the first PGPROC or if we have found our right position
479  * in the array, break
480  */
481  if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno))
482  break;
483  }
484 
485  memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index],
486  (arrayP->numProcs - index) * sizeof(*arrayP->pgprocnos));
487  memmove(&ProcGlobal->xids[index + 1], &ProcGlobal->xids[index],
488  (arrayP->numProcs - index) * sizeof(*ProcGlobal->xids));
489  memmove(&ProcGlobal->subxidStates[index + 1], &ProcGlobal->subxidStates[index],
490  (arrayP->numProcs - index) * sizeof(*ProcGlobal->subxidStates));
491  memmove(&ProcGlobal->statusFlags[index + 1], &ProcGlobal->statusFlags[index],
492  (arrayP->numProcs - index) * sizeof(*ProcGlobal->statusFlags));
493 
494  arrayP->pgprocnos[index] = proc->pgprocno;
495  ProcGlobal->xids[index] = proc->xid;
496  ProcGlobal->subxidStates[index] = proc->subxidStatus;
497  ProcGlobal->statusFlags[index] = proc->statusFlags;
498 
499  arrayP->numProcs++;
500 
501  for (; index < arrayP->numProcs; index++)
502  {
503  allProcs[arrayP->pgprocnos[index]].pgxactoff = index;
504  }
505 
506  /*
507  * Release in reversed acquisition order, to reduce frequency of having to
508  * wait for XidGenLock while holding ProcArrayLock.
509  */
510  LWLockRelease(XidGenLock);
511  LWLockRelease(ProcArrayLock);
512 }
513 
514 /*
515  * Remove the specified PGPROC from the shared array.
516  *
517  * When latestXid is a valid XID, we are removing a live 2PC gxact from the
518  * array, and thus causing it to appear as "not running" anymore. In this
519  * case we must advance latestCompletedXid. (This is essentially the same
520  * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
521  * the ProcArrayLock only once, and don't damage the content of the PGPROC;
522  * twophase.c depends on the latter.)
523  */
524 void
526 {
527  ProcArrayStruct *arrayP = procArray;
528  int index;
529 
530 #ifdef XIDCACHE_DEBUG
531  /* dump stats at backend shutdown, but not prepared-xact end */
532  if (proc->pid != 0)
533  DisplayXidCache();
534 #endif
535 
536  /* See ProcGlobal comment explaining why both locks are held */
537  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
538  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
539 
540  Assert(ProcGlobal->allProcs[arrayP->pgprocnos[proc->pgxactoff]].pgxactoff == proc->pgxactoff);
541 
542  if (TransactionIdIsValid(latestXid))
543  {
545 
546  /* Advance global latestCompletedXid while holding the lock */
547  MaintainLatestCompletedXid(latestXid);
548 
549  /* Same with xactCompletionCount */
551 
552  ProcGlobal->xids[proc->pgxactoff] = 0;
555  }
556  else
557  {
558  /* Shouldn't be trying to remove a live transaction here */
560  }
561 
565  ProcGlobal->statusFlags[proc->pgxactoff] = 0;
566 
567  for (index = 0; index < arrayP->numProcs; index++)
568  {
569  if (arrayP->pgprocnos[index] == proc->pgprocno)
570  {
571  /* Keep the PGPROC array sorted. See notes above */
572  memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1],
573  (arrayP->numProcs - index - 1) * sizeof(*arrayP->pgprocnos));
574  memmove(&ProcGlobal->xids[index], &ProcGlobal->xids[index + 1],
575  (arrayP->numProcs - index - 1) * sizeof(*ProcGlobal->xids));
576  memmove(&ProcGlobal->subxidStates[index], &ProcGlobal->subxidStates[index + 1],
577  (arrayP->numProcs - index - 1) * sizeof(*ProcGlobal->subxidStates));
578  memmove(&ProcGlobal->statusFlags[index], &ProcGlobal->statusFlags[index + 1],
579  (arrayP->numProcs - index - 1) * sizeof(*ProcGlobal->statusFlags));
580 
581  arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
582  arrayP->numProcs--;
583 
584  /* adjust for removed PGPROC */
585  for (; index < arrayP->numProcs; index++)
586  allProcs[arrayP->pgprocnos[index]].pgxactoff--;
587 
588  /*
589  * Release in reversed acquisition order, to reduce frequency of
590  * having to wait for XidGenLock while holding ProcArrayLock.
591  */
592  LWLockRelease(XidGenLock);
593  LWLockRelease(ProcArrayLock);
594  return;
595  }
596  }
597 
598  /* Oops */
599  LWLockRelease(XidGenLock);
600  LWLockRelease(ProcArrayLock);
601 
602  elog(LOG, "failed to find proc %p in ProcArray", proc);
603 }
604 
605 
606 /*
607  * ProcArrayEndTransaction -- mark a transaction as no longer running
608  *
609  * This is used interchangeably for commit and abort cases. The transaction
610  * commit/abort must already be reported to WAL and pg_xact.
611  *
612  * proc is currently always MyProc, but we pass it explicitly for flexibility.
613  * latestXid is the latest Xid among the transaction's main XID and
614  * subtransactions, or InvalidTransactionId if it has no XID. (We must ask
615  * the caller to pass latestXid, instead of computing it from the PGPROC's
616  * contents, because the subxid information in the PGPROC might be
617  * incomplete.)
618  */
619 void
621 {
622  if (TransactionIdIsValid(latestXid))
623  {
624  /*
625  * We must lock ProcArrayLock while clearing our advertised XID, so
626  * that we do not exit the set of "running" transactions while someone
627  * else is taking a snapshot. See discussion in
628  * src/backend/access/transam/README.
629  */
631 
632  /*
633  * If we can immediately acquire ProcArrayLock, we clear our own XID
634  * and release the lock. If not, use group XID clearing to improve
635  * efficiency.
636  */
637  if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
638  {
639  ProcArrayEndTransactionInternal(proc, latestXid);
640  LWLockRelease(ProcArrayLock);
641  }
642  else
643  ProcArrayGroupClearXid(proc, latestXid);
644  }
645  else
646  {
647  /*
648  * If we have no XID, we don't need to lock, since we won't affect
649  * anyone else's calculation of a snapshot. We might change their
650  * estimate of global xmin, but that's OK.
651  */
653  Assert(proc->subxidStatus.count == 0);
655 
657  proc->xmin = InvalidTransactionId;
658  proc->delayChkpt = false; /* be sure this is cleared in abort */
659  proc->recoveryConflictPending = false;
660 
661  /* must be cleared with xid/xmin: */
662  /* avoid unnecessarily dirtying shared cachelines */
664  {
665  Assert(!LWLockHeldByMe(ProcArrayLock));
666  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
670  LWLockRelease(ProcArrayLock);
671  }
672  }
673 }
674 
675 /*
676  * Mark a write transaction as no longer running.
677  *
678  * We don't do any locking here; caller must handle that.
679  */
680 static inline void
682 {
683  size_t pgxactoff = proc->pgxactoff;
684 
685  /*
686  * Note: we need exclusive lock here because we're going to change other
687  * processes' PGPROC entries.
688  */
689  Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE));
691  Assert(ProcGlobal->xids[pgxactoff] == proc->xid);
692 
693  ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
694  proc->xid = InvalidTransactionId;
696  proc->xmin = InvalidTransactionId;
697  proc->delayChkpt = false; /* be sure this is cleared in abort */
698  proc->recoveryConflictPending = false;
699 
700  /* must be cleared with xid/xmin: */
701  /* avoid unnecessarily dirtying shared cachelines */
703  {
706  }
707 
708  /* Clear the subtransaction-XID cache too while holding the lock */
709  Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
711  if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
712  {
713  ProcGlobal->subxidStates[pgxactoff].count = 0;
714  ProcGlobal->subxidStates[pgxactoff].overflowed = false;
715  proc->subxidStatus.count = 0;
716  proc->subxidStatus.overflowed = false;
717  }
718 
719  /* Also advance global latestCompletedXid while holding the lock */
720  MaintainLatestCompletedXid(latestXid);
721 
722  /* Same with xactCompletionCount */
724 }
725 
726 /*
727  * ProcArrayGroupClearXid -- group XID clearing
728  *
729  * When we cannot immediately acquire ProcArrayLock in exclusive mode at
730  * commit time, add ourselves to a list of processes that need their XIDs
731  * cleared. The first process to add itself to the list will acquire
732  * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
733  * on behalf of all group members. This avoids a great deal of contention
734  * around ProcArrayLock when many processes are trying to commit at once,
735  * since the lock need not be repeatedly handed off from one committing
736  * process to the next.
737  */
738 static void
740 {
741  PROC_HDR *procglobal = ProcGlobal;
742  uint32 nextidx;
743  uint32 wakeidx;
744 
745  /* We should definitely have an XID to clear. */
747 
748  /* Add ourselves to the list of processes needing a group XID clear. */
749  proc->procArrayGroupMember = true;
750  proc->procArrayGroupMemberXid = latestXid;
751  nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
752  while (true)
753  {
754  pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx);
755 
757  &nextidx,
758  (uint32) proc->pgprocno))
759  break;
760  }
761 
762  /*
763  * If the list was not empty, the leader will clear our XID. It is
764  * impossible to have followers without a leader because the first process
765  * that has added itself to the list will always have nextidx as
766  * INVALID_PGPROCNO.
767  */
768  if (nextidx != INVALID_PGPROCNO)
769  {
770  int extraWaits = 0;
771 
772  /* Sleep until the leader clears our XID. */
774  for (;;)
775  {
776  /* acts as a read barrier */
777  PGSemaphoreLock(proc->sem);
778  if (!proc->procArrayGroupMember)
779  break;
780  extraWaits++;
781  }
783 
785 
786  /* Fix semaphore count for any absorbed wakeups */
787  while (extraWaits-- > 0)
788  PGSemaphoreUnlock(proc->sem);
789  return;
790  }
791 
792  /* We are the leader. Acquire the lock on behalf of everyone. */
793  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
794 
795  /*
796  * Now that we've got the lock, clear the list of processes waiting for
797  * group XID clearing, saving a pointer to the head of the list. Trying
798  * to pop elements one at a time could lead to an ABA problem.
799  */
800  nextidx = pg_atomic_exchange_u32(&procglobal->procArrayGroupFirst,
802 
803  /* Remember head of list so we can perform wakeups after dropping lock. */
804  wakeidx = nextidx;
805 
806  /* Walk the list and clear all XIDs. */
807  while (nextidx != INVALID_PGPROCNO)
808  {
809  PGPROC *proc = &allProcs[nextidx];
810 
812 
813  /* Move to next proc in list. */
814  nextidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
815  }
816 
817  /* We're done with the lock now. */
818  LWLockRelease(ProcArrayLock);
819 
820  /*
821  * Now that we've released the lock, go back and wake everybody up. We
822  * don't do this under the lock so as to keep lock hold times to a
823  * minimum. The system calls we need to perform to wake other processes
824  * up are probably much slower than the simple memory writes we did while
825  * holding the lock.
826  */
827  while (wakeidx != INVALID_PGPROCNO)
828  {
829  PGPROC *proc = &allProcs[wakeidx];
830 
831  wakeidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
833 
834  /* ensure all previous writes are visible before follower continues. */
836 
837  proc->procArrayGroupMember = false;
838 
839  if (proc != MyProc)
840  PGSemaphoreUnlock(proc->sem);
841  }
842 }
843 
844 /*
845  * ProcArrayClearTransaction -- clear the transaction fields
846  *
847  * This is used after successfully preparing a 2-phase transaction. We are
848  * not actually reporting the transaction's XID as no longer running --- it
849  * will still appear as running because the 2PC's gxact is in the ProcArray
850  * too. We just have to clear out our own PGPROC.
851  */
852 void
854 {
855  size_t pgxactoff;
856 
857  /*
858  * Currently we need to lock ProcArrayLock exclusively here, as we
859  * increment xactCompletionCount below. We also need it at least in shared
860  * mode for pgproc->pgxactoff to stay the same below.
861  *
862  * We could however, as this action does not actually change anyone's view
863  * of the set of running XIDs (our entry is duplicate with the gxact that
864  * has already been inserted into the ProcArray), lower the lock level to
865  * shared if we were to make xactCompletionCount an atomic variable. But
866  * that doesn't seem worth it currently, as a 2PC commit is heavyweight
867  * enough for this not to be the bottleneck. If it ever becomes a
868  * bottleneck it may also be worth considering to combine this with the
869  * subsequent ProcArrayRemove()
870  */
871  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
872 
873  pgxactoff = proc->pgxactoff;
874 
875  ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
876  proc->xid = InvalidTransactionId;
877 
879  proc->xmin = InvalidTransactionId;
880  proc->recoveryConflictPending = false;
881 
883  Assert(!proc->delayChkpt);
884 
885  /*
886  * Need to increment completion count even though transaction hasn't
887  * really committed yet. The reason for that is that GetSnapshotData()
888  * omits the xid of the current transaction, thus without the increment we
889  * otherwise could end up reusing the snapshot later. Which would be bad,
890  * because it might not count the prepared transaction as running.
891  */
893 
894  /* Clear the subtransaction-XID cache too */
895  Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
897  if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
898  {
899  ProcGlobal->subxidStates[pgxactoff].count = 0;
900  ProcGlobal->subxidStates[pgxactoff].overflowed = false;
901  proc->subxidStatus.count = 0;
902  proc->subxidStatus.overflowed = false;
903  }
904 
905  LWLockRelease(ProcArrayLock);
906 }
907 
908 /*
909  * Update ShmemVariableCache->latestCompletedXid to point to latestXid if
910  * currently older.
911  */
912 static void
914 {
916 
917  Assert(FullTransactionIdIsValid(cur_latest));
919  Assert(LWLockHeldByMe(ProcArrayLock));
920 
921  if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
922  {
924  FullXidRelativeTo(cur_latest, latestXid);
925  }
926 
929 }
930 
931 /*
932  * Same as MaintainLatestCompletedXid, except for use during WAL replay.
933  */
934 static void
936 {
938  FullTransactionId rel;
939 
941  Assert(LWLockHeldByMe(ProcArrayLock));
942 
943  /*
944  * Need a FullTransactionId to compare latestXid with. Can't rely on
945  * latestCompletedXid to be initialized in recovery. But in recovery it's
946  * safe to access nextXid without a lock for the startup process.
947  */
950 
951  if (!FullTransactionIdIsValid(cur_latest) ||
952  TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
953  {
955  FullXidRelativeTo(rel, latestXid);
956  }
957 
959 }
960 
961 /*
962  * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
963  *
964  * Remember up to where the startup process initialized the CLOG and subtrans
965  * so we can ensure it's initialized gaplessly up to the point where necessary
966  * while in recovery.
967  */
968 void
970 {
972  Assert(TransactionIdIsNormal(initializedUptoXID));
973 
974  /*
975  * we set latestObservedXid to the xid SUBTRANS has been initialized up
976  * to, so we can extend it from that point onwards in
977  * RecordKnownAssignedTransactionIds, and when we get consistent in
978  * ProcArrayApplyRecoveryInfo().
979  */
980  latestObservedXid = initializedUptoXID;
982 }
983 
984 /*
985  * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
986  *
987  * Takes us through 3 states: Initialized, Pending and Ready.
988  * Normal case is to go all the way to Ready straight away, though there
989  * are atypical cases where we need to take it in steps.
990  *
991  * Use the data about running transactions on the primary to create the initial
992  * state of KnownAssignedXids. We also use these records to regularly prune
993  * KnownAssignedXids because we know it is possible that some transactions
994  * with FATAL errors fail to write abort records, which could cause eventual
995  * overflow.
996  *
997  * See comments for LogStandbySnapshot().
998  */
999 void
1001 {
1002  TransactionId *xids;
1003  int nxids;
1004  int i;
1005 
1007  Assert(TransactionIdIsValid(running->nextXid));
1010 
1011  /*
1012  * Remove stale transactions, if any.
1013  */
1015 
1016  /*
1017  * Remove stale locks, if any.
1018  */
1020 
1021  /*
1022  * If our snapshot is already valid, nothing else to do...
1023  */
1025  return;
1026 
1027  /*
1028  * If our initial RunningTransactionsData had an overflowed snapshot then
1029  * we knew we were missing some subxids from our snapshot. If we continue
1030  * to see overflowed snapshots then we might never be able to start up, so
1031  * we make another test to see if our snapshot is now valid. We know that
1032  * the missing subxids are equal to or earlier than nextXid. After we
1033  * initialise we continue to apply changes during recovery, so once the
1034  * oldestRunningXid is later than the nextXid from the initial snapshot we
1035  * know that we no longer have missing information and can mark the
1036  * snapshot as valid.
1037  */
1039  {
1040  /*
1041  * If the snapshot isn't overflowed or if its empty we can reset our
1042  * pending state and use this snapshot instead.
1043  */
1044  if (!running->subxid_overflow || running->xcnt == 0)
1045  {
1046  /*
1047  * If we have already collected known assigned xids, we need to
1048  * throw them away before we apply the recovery snapshot.
1049  */
1052  }
1053  else
1054  {
1056  running->oldestRunningXid))
1057  {
1060  "recovery snapshots are now enabled");
1061  }
1062  else
1064  "recovery snapshot waiting for non-overflowed snapshot or "
1065  "until oldest active xid on standby is at least %u (now %u)",
1067  running->oldestRunningXid);
1068  return;
1069  }
1070  }
1071 
1073 
1074  /*
1075  * NB: this can be reached at least twice, so make sure new code can deal
1076  * with that.
1077  */
1078 
1079  /*
1080  * Nobody else is running yet, but take locks anyhow
1081  */
1082  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1083 
1084  /*
1085  * KnownAssignedXids is sorted so we cannot just add the xids, we have to
1086  * sort them first.
1087  *
1088  * Some of the new xids are top-level xids and some are subtransactions.
1089  * We don't call SubTransSetParent because it doesn't matter yet. If we
1090  * aren't overflowed then all xids will fit in snapshot and so we don't
1091  * need subtrans. If we later overflow, an xid assignment record will add
1092  * xids to subtrans. If RunningTransactionsData is overflowed then we
1093  * don't have enough information to correctly update subtrans anyway.
1094  */
1095 
1096  /*
1097  * Allocate a temporary array to avoid modifying the array passed as
1098  * argument.
1099  */
1100  xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt));
1101 
1102  /*
1103  * Add to the temp array any xids which have not already completed.
1104  */
1105  nxids = 0;
1106  for (i = 0; i < running->xcnt + running->subxcnt; i++)
1107  {
1108  TransactionId xid = running->xids[i];
1109 
1110  /*
1111  * The running-xacts snapshot can contain xids that were still visible
1112  * in the procarray when the snapshot was taken, but were already
1113  * WAL-logged as completed. They're not running anymore, so ignore
1114  * them.
1115  */
1117  continue;
1118 
1119  xids[nxids++] = xid;
1120  }
1121 
1122  if (nxids > 0)
1123  {
1124  if (procArray->numKnownAssignedXids != 0)
1125  {
1126  LWLockRelease(ProcArrayLock);
1127  elog(ERROR, "KnownAssignedXids is not empty");
1128  }
1129 
1130  /*
1131  * Sort the array so that we can add them safely into
1132  * KnownAssignedXids.
1133  */
1134  qsort(xids, nxids, sizeof(TransactionId), xidComparator);
1135 
1136  /*
1137  * Add the sorted snapshot into KnownAssignedXids. The running-xacts
1138  * snapshot may include duplicated xids because of prepared
1139  * transactions, so ignore them.
1140  */
1141  for (i = 0; i < nxids; i++)
1142  {
1143  if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i]))
1144  {
1145  elog(DEBUG1,
1146  "found duplicated transaction %u for KnownAssignedXids insertion",
1147  xids[i]);
1148  continue;
1149  }
1150  KnownAssignedXidsAdd(xids[i], xids[i], true);
1151  }
1152 
1154  }
1155 
1156  pfree(xids);
1157 
1158  /*
1159  * latestObservedXid is at least set to the point where SUBTRANS was
1160  * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid
1161  * RecordKnownAssignedTransactionIds() was called for. Initialize
1162  * subtrans from thereon, up to nextXid - 1.
1163  *
1164  * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
1165  * because we've just added xids to the known assigned xids machinery that
1166  * haven't gone through RecordKnownAssignedTransactionId().
1167  */
1171  {
1174  }
1175  TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */
1176 
1177  /* ----------
1178  * Now we've got the running xids we need to set the global values that
1179  * are used to track snapshots as they evolve further.
1180  *
1181  * - latestCompletedXid which will be the xmax for snapshots
1182  * - lastOverflowedXid which shows whether snapshots overflow
1183  * - nextXid
1184  *
1185  * If the snapshot overflowed, then we still initialise with what we know,
1186  * but the recovery snapshot isn't fully valid yet because we know there
1187  * are some subxids missing. We don't know the specific subxids that are
1188  * missing, so conservatively assume the last one is latestObservedXid.
1189  * ----------
1190  */
1191  if (running->subxid_overflow)
1192  {
1194 
1196  procArray->lastOverflowedXid = latestObservedXid;
1197  }
1198  else
1199  {
1201 
1203  }
1204 
1205  /*
1206  * If a transaction wrote a commit record in the gap between taking and
1207  * logging the snapshot then latestCompletedXid may already be higher than
1208  * the value from the snapshot, so check before we use the incoming value.
1209  * It also might not yet be set at all.
1210  */
1212 
1213  /*
1214  * NB: No need to increment ShmemVariableCache->xactCompletionCount here,
1215  * nobody can see it yet.
1216  */
1217 
1218  LWLockRelease(ProcArrayLock);
1219 
1220  /* ShmemVariableCache->nextXid must be beyond any observed xid. */
1222 
1224 
1227  elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled");
1228  else
1230  "recovery snapshot waiting for non-overflowed snapshot or "
1231  "until oldest active xid on standby is at least %u (now %u)",
1233  running->oldestRunningXid);
1234 }
1235 
1236 /*
1237  * ProcArrayApplyXidAssignment
1238  * Process an XLOG_XACT_ASSIGNMENT WAL record
1239  */
1240 void
1242  int nsubxids, TransactionId *subxids)
1243 {
1244  TransactionId max_xid;
1245  int i;
1246 
1248 
1249  max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
1250 
1251  /*
1252  * Mark all the subtransactions as observed.
1253  *
1254  * NOTE: This will fail if the subxid contains too many previously
1255  * unobserved xids to fit into known-assigned-xids. That shouldn't happen
1256  * as the code stands, because xid-assignment records should never contain
1257  * more than PGPROC_MAX_CACHED_SUBXIDS entries.
1258  */
1260 
1261  /*
1262  * Notice that we update pg_subtrans with the top-level xid, rather than
1263  * the parent xid. This is a difference between normal processing and
1264  * recovery, yet is still correct in all cases. The reason is that
1265  * subtransaction commit is not marked in clog until commit processing, so
1266  * all aborted subtransactions have already been clearly marked in clog.
1267  * As a result we are able to refer directly to the top-level
1268  * transaction's state rather than skipping through all the intermediate
1269  * states in the subtransaction tree. This should be the first time we
1270  * have attempted to SubTransSetParent().
1271  */
1272  for (i = 0; i < nsubxids; i++)
1273  SubTransSetParent(subxids[i], topxid);
1274 
1275  /* KnownAssignedXids isn't maintained yet, so we're done for now */
1277  return;
1278 
1279  /*
1280  * Uses same locking as transaction commit
1281  */
1282  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1283 
1284  /*
1285  * Remove subxids from known-assigned-xacts.
1286  */
1288 
1289  /*
1290  * Advance lastOverflowedXid to be at least the last of these subxids.
1291  */
1292  if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid))
1293  procArray->lastOverflowedXid = max_xid;
1294 
1295  LWLockRelease(ProcArrayLock);
1296 }
1297 
1298 /*
1299  * TransactionIdIsInProgress -- is given transaction running in some backend
1300  *
1301  * Aside from some shortcuts such as checking RecentXmin and our own Xid,
1302  * there are four possibilities for finding a running transaction:
1303  *
1304  * 1. The given Xid is a main transaction Id. We will find this out cheaply
1305  * by looking at ProcGlobal->xids.
1306  *
1307  * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
1308  * We can find this out cheaply too.
1309  *
1310  * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
1311  * if the Xid is running on the primary.
1312  *
1313  * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
1314  * if that is running according to ProcGlobal->xids[] or KnownAssignedXids.
1315  * This is the slowest way, but sadly it has to be done always if the others
1316  * failed, unless we see that the cached subxact sets are complete (none have
1317  * overflowed).
1318  *
1319  * ProcArrayLock has to be held while we do 1, 2, 3. If we save the top Xids
1320  * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
1321  * This buys back some concurrency (and we can't retrieve the main Xids from
1322  * ProcGlobal->xids[] again anyway; see GetNewTransactionId).
1323  */
1324 bool
1326 {
1327  static TransactionId *xids = NULL;
1328  static TransactionId *other_xids;
1329  XidCacheStatus *other_subxidstates;
1330  int nxids = 0;
1331  ProcArrayStruct *arrayP = procArray;
1332  TransactionId topxid;
1333  TransactionId latestCompletedXid;
1334  int mypgxactoff;
1335  size_t numProcs;
1336  int j;
1337 
1338  /*
1339  * Don't bother checking a transaction older than RecentXmin; it could not
1340  * possibly still be running. (Note: in particular, this guarantees that
1341  * we reject InvalidTransactionId, FrozenTransactionId, etc as not
1342  * running.)
1343  */
1345  {
1347  return false;
1348  }
1349 
1350  /*
1351  * We may have just checked the status of this transaction, so if it is
1352  * already known to be completed, we can fall out without any access to
1353  * shared memory.
1354  */
1356  {
1358  return false;
1359  }
1360 
1361  /*
1362  * Also, we can handle our own transaction (and subtransactions) without
1363  * any access to shared memory.
1364  */
1366  {
1368  return true;
1369  }
1370 
1371  /*
1372  * If first time through, get workspace to remember main XIDs in. We
1373  * malloc it permanently to avoid repeated palloc/pfree overhead.
1374  */
1375  if (xids == NULL)
1376  {
1377  /*
1378  * In hot standby mode, reserve enough space to hold all xids in the
1379  * known-assigned list. If we later finish recovery, we no longer need
1380  * the bigger array, but we don't bother to shrink it.
1381  */
1382  int maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs;
1383 
1384  xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId));
1385  if (xids == NULL)
1386  ereport(ERROR,
1387  (errcode(ERRCODE_OUT_OF_MEMORY),
1388  errmsg("out of memory")));
1389  }
1390 
1391  other_xids = ProcGlobal->xids;
1392  other_subxidstates = ProcGlobal->subxidStates;
1393 
1394  LWLockAcquire(ProcArrayLock, LW_SHARED);
1395 
1396  /*
1397  * Now that we have the lock, we can check latestCompletedXid; if the
1398  * target Xid is after that, it's surely still running.
1399  */
1400  latestCompletedXid =
1402  if (TransactionIdPrecedes(latestCompletedXid, xid))
1403  {
1404  LWLockRelease(ProcArrayLock);
1406  return true;
1407  }
1408 
1409  /* No shortcuts, gotta grovel through the array */
1410  mypgxactoff = MyProc->pgxactoff;
1411  numProcs = arrayP->numProcs;
1412  for (size_t pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
1413  {
1414  int pgprocno;
1415  PGPROC *proc;
1416  TransactionId pxid;
1417  int pxids;
1418 
1419  /* Ignore ourselves --- dealt with it above */
1420  if (pgxactoff == mypgxactoff)
1421  continue;
1422 
1423  /* Fetch xid just once - see GetNewTransactionId */
1424  pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
1425 
1426  if (!TransactionIdIsValid(pxid))
1427  continue;
1428 
1429  /*
1430  * Step 1: check the main Xid
1431  */
1432  if (TransactionIdEquals(pxid, xid))
1433  {
1434  LWLockRelease(ProcArrayLock);
1436  return true;
1437  }
1438 
1439  /*
1440  * We can ignore main Xids that are younger than the target Xid, since
1441  * the target could not possibly be their child.
1442  */
1443  if (TransactionIdPrecedes(xid, pxid))
1444  continue;
1445 
1446  /*
1447  * Step 2: check the cached child-Xids arrays
1448  */
1449  pxids = other_subxidstates[pgxactoff].count;
1450  pg_read_barrier(); /* pairs with barrier in GetNewTransactionId() */
1451  pgprocno = arrayP->pgprocnos[pgxactoff];
1452  proc = &allProcs[pgprocno];
1453  for (j = pxids - 1; j >= 0; j--)
1454  {
1455  /* Fetch xid just once - see GetNewTransactionId */
1456  TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]);
1457 
1458  if (TransactionIdEquals(cxid, xid))
1459  {
1460  LWLockRelease(ProcArrayLock);
1462  return true;
1463  }
1464  }
1465 
1466  /*
1467  * Save the main Xid for step 4. We only need to remember main Xids
1468  * that have uncached children. (Note: there is no race condition
1469  * here because the overflowed flag cannot be cleared, only set, while
1470  * we hold ProcArrayLock. So we can't miss an Xid that we need to
1471  * worry about.)
1472  */
1473  if (other_subxidstates[pgxactoff].overflowed)
1474  xids[nxids++] = pxid;
1475  }
1476 
1477  /*
1478  * Step 3: in hot standby mode, check the known-assigned-xids list. XIDs
1479  * in the list must be treated as running.
1480  */
1481  if (RecoveryInProgress())
1482  {
1483  /* none of the PGPROC entries should have XIDs in hot standby mode */
1484  Assert(nxids == 0);
1485 
1486  if (KnownAssignedXidExists(xid))
1487  {
1488  LWLockRelease(ProcArrayLock);
1490  return true;
1491  }
1492 
1493  /*
1494  * If the KnownAssignedXids overflowed, we have to check pg_subtrans
1495  * too. Fetch all xids from KnownAssignedXids that are lower than
1496  * xid, since if xid is a subtransaction its parent will always have a
1497  * lower value. Note we will collect both main and subXIDs here, but
1498  * there's no help for it.
1499  */
1500  if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid))
1501  nxids = KnownAssignedXidsGet(xids, xid);
1502  }
1503 
1504  LWLockRelease(ProcArrayLock);
1505 
1506  /*
1507  * If none of the relevant caches overflowed, we know the Xid is not
1508  * running without even looking at pg_subtrans.
1509  */
1510  if (nxids == 0)
1511  {
1513  return false;
1514  }
1515 
1516  /*
1517  * Step 4: have to check pg_subtrans.
1518  *
1519  * At this point, we know it's either a subtransaction of one of the Xids
1520  * in xids[], or it's not running. If it's an already-failed
1521  * subtransaction, we want to say "not running" even though its parent may
1522  * still be running. So first, check pg_xact to see if it's been aborted.
1523  */
1525 
1526  if (TransactionIdDidAbort(xid))
1527  return false;
1528 
1529  /*
1530  * It isn't aborted, so check whether the transaction tree it belongs to
1531  * is still running (or, more precisely, whether it was running when we
1532  * held ProcArrayLock).
1533  */
1534  topxid = SubTransGetTopmostTransaction(xid);
1535  Assert(TransactionIdIsValid(topxid));
1536  if (!TransactionIdEquals(topxid, xid))
1537  {
1538  for (int i = 0; i < nxids; i++)
1539  {
1540  if (TransactionIdEquals(xids[i], topxid))
1541  return true;
1542  }
1543  }
1544 
1545  return false;
1546 }
1547 
1548 /*
1549  * TransactionIdIsActive -- is xid the top-level XID of an active backend?
1550  *
1551  * This differs from TransactionIdIsInProgress in that it ignores prepared
1552  * transactions, as well as transactions running on the primary if we're in
1553  * hot standby. Also, we ignore subtransactions since that's not needed
1554  * for current uses.
1555  */
1556 bool
1558 {
1559  bool result = false;
1560  ProcArrayStruct *arrayP = procArray;
1561  TransactionId *other_xids = ProcGlobal->xids;
1562  int i;
1563 
1564  /*
1565  * Don't bother checking a transaction older than RecentXmin; it could not
1566  * possibly still be running.
1567  */
1569  return false;
1570 
1571  LWLockAcquire(ProcArrayLock, LW_SHARED);
1572 
1573  for (i = 0; i < arrayP->numProcs; i++)
1574  {
1575  int pgprocno = arrayP->pgprocnos[i];
1576  PGPROC *proc = &allProcs[pgprocno];
1577  TransactionId pxid;
1578 
1579  /* Fetch xid just once - see GetNewTransactionId */
1580  pxid = UINT32_ACCESS_ONCE(other_xids[i]);
1581 
1582  if (!TransactionIdIsValid(pxid))
1583  continue;
1584 
1585  if (proc->pid == 0)
1586  continue; /* ignore prepared transactions */
1587 
1588  if (TransactionIdEquals(pxid, xid))
1589  {
1590  result = true;
1591  break;
1592  }
1593  }
1594 
1595  LWLockRelease(ProcArrayLock);
1596 
1597  return result;
1598 }
1599 
1600 
1601 /*
1602  * Determine XID horizons.
1603  *
1604  * This is used by wrapper functions like GetOldestNonRemovableTransactionId()
1605  * (for VACUUM), GetReplicationHorizons() (for hot_standby_feedback), etc as
1606  * well as "internally" by GlobalVisUpdate() (see comment above struct
1607  * GlobalVisState).
1608  *
1609  * See the definition of ComputeXidHorizonsResult for the various computed
1610  * horizons.
1611  *
1612  * For VACUUM separate horizons (used to decide which deleted tuples must
1613  * be preserved), for shared and non-shared tables are computed. For shared
1614  * relations backends in all databases must be considered, but for non-shared
1615  * relations that's not required, since only backends in my own database could
1616  * ever see the tuples in them. Also, we can ignore concurrently running lazy
1617  * VACUUMs because (a) they must be working on other tables, and (b) they
1618  * don't need to do snapshot-based lookups. Similarly, for the non-catalog
1619  * horizon, we can ignore CREATE INDEX CONCURRENTLY and REINDEX CONCURRENTLY
1620  * when they are working on non-partial, non-expressional indexes, for the
1621  * same reasons and because they can't run in transaction blocks. (They are
1622  * not possible to ignore for catalogs, because CIC and RC do some catalog
1623  * operations.) Do note that this means that CIC and RC must use a lock level
1624  * that conflicts with VACUUM.
1625  *
1626  * This also computes a horizon used to truncate pg_subtrans. For that
1627  * backends in all databases have to be considered, and concurrently running
1628  * lazy VACUUMs cannot be ignored, as they still may perform pg_subtrans
1629  * accesses.
1630  *
1631  * Note: we include all currently running xids in the set of considered xids.
1632  * This ensures that if a just-started xact has not yet set its snapshot,
1633  * when it does set the snapshot it cannot set xmin less than what we compute.
1634  * See notes in src/backend/access/transam/README.
1635  *
1636  * Note: despite the above, it's possible for the calculated values to move
1637  * backwards on repeated calls. The calculated values are conservative, so
1638  * that anything older is definitely not considered as running by anyone
1639  * anymore, but the exact values calculated depend on a number of things. For
1640  * example, if there are no transactions running in the current database, the
1641  * horizon for normal tables will be latestCompletedXid. If a transaction
1642  * begins after that, its xmin will include in-progress transactions in other
1643  * databases that started earlier, so another call will return a lower value.
1644  * Nonetheless it is safe to vacuum a table in the current database with the
1645  * first result. There are also replication-related effects: a walsender
1646  * process can set its xmin based on transactions that are no longer running
1647  * on the primary but are still being replayed on the standby, thus possibly
1648  * making the values go backwards. In this case there is a possibility that
1649  * we lose data that the standby would like to have, but unless the standby
1650  * uses a replication slot to make its xmin persistent there is little we can
1651  * do about that --- data is only protected if the walsender runs continuously
1652  * while queries are executed on the standby. (The Hot Standby code deals
1653  * with such cases by failing standby queries that needed to access
1654  * already-removed data, so there's no integrity bug.) The computed values
1655  * are also adjusted with vacuum_defer_cleanup_age, so increasing that setting
1656  * on the fly is another easy way to make horizons move backwards, with no
1657  * consequences for data integrity.
1658  *
1659  * Note: the approximate horizons (see definition of GlobalVisState) are
1660  * updated by the computations done here. That's currently required for
1661  * correctness and a small optimization. Without doing so it's possible that
1662  * heap vacuum's call to heap_page_prune() uses a more conservative horizon
1663  * than later when deciding which tuples can be removed - which the code
1664  * doesn't expect (breaking HOT).
1665  */
1666 static void
1668 {
1669  ProcArrayStruct *arrayP = procArray;
1670  TransactionId kaxmin;
1671  bool in_recovery = RecoveryInProgress();
1672  TransactionId *other_xids = ProcGlobal->xids;
1673 
1674  LWLockAcquire(ProcArrayLock, LW_SHARED);
1675 
1677 
1678  /*
1679  * We initialize the MIN() calculation with latestCompletedXid + 1. This
1680  * is a lower bound for the XIDs that might appear in the ProcArray later,
1681  * and so protects us against overestimating the result due to future
1682  * additions.
1683  */
1684  {
1685  TransactionId initial;
1686 
1688  Assert(TransactionIdIsValid(initial));
1689  TransactionIdAdvance(initial);
1690 
1691  h->oldest_considered_running = initial;
1692  h->shared_oldest_nonremovable = initial;
1693  h->catalog_oldest_nonremovable = initial;
1694  h->data_oldest_nonremovable = initial;
1695 
1696  /*
1697  * Only modifications made by this backend affect the horizon for
1698  * temporary relations. Instead of a check in each iteration of the
1699  * loop over all PGPROCs it is cheaper to just initialize to the
1700  * current top-level xid any.
1701  *
1702  * Without an assigned xid we could use a horizon as aggressive as
1703  * ReadNewTransactionid(), but we can get away with the much cheaper
1704  * latestCompletedXid + 1: If this backend has no xid there, by
1705  * definition, can't be any newer changes in the temp table than
1706  * latestCompletedXid.
1707  */
1710  else
1711  h->temp_oldest_nonremovable = initial;
1712  }
1713 
1714  /*
1715  * Fetch slot horizons while ProcArrayLock is held - the
1716  * LWLockAcquire/LWLockRelease are a barrier, ensuring this happens inside
1717  * the lock.
1718  */
1719  h->slot_xmin = procArray->replication_slot_xmin;
1721 
1722  for (int index = 0; index < arrayP->numProcs; index++)
1723  {
1724  int pgprocno = arrayP->pgprocnos[index];
1725  PGPROC *proc = &allProcs[pgprocno];
1726  int8 statusFlags = ProcGlobal->statusFlags[index];
1727  TransactionId xid;
1728  TransactionId xmin;
1729 
1730  /* Fetch xid just once - see GetNewTransactionId */
1731  xid = UINT32_ACCESS_ONCE(other_xids[index]);
1732  xmin = UINT32_ACCESS_ONCE(proc->xmin);
1733 
1734  /*
1735  * Consider both the transaction's Xmin, and its Xid.
1736  *
1737  * We must check both because a transaction might have an Xmin but not
1738  * (yet) an Xid; conversely, if it has an Xid, that could determine
1739  * some not-yet-set Xmin.
1740  */
1741  xmin = TransactionIdOlder(xmin, xid);
1742 
1743  /* if neither is set, this proc doesn't influence the horizon */
1744  if (!TransactionIdIsValid(xmin))
1745  continue;
1746 
1747  /*
1748  * Don't ignore any procs when determining which transactions might be
1749  * considered running. While slots should ensure logical decoding
1750  * backends are protected even without this check, it can't hurt to
1751  * include them here as well..
1752  */
1755 
1756  /*
1757  * Skip over backends either vacuuming (which is ok with rows being
1758  * removed, as long as pg_subtrans is not truncated) or doing logical
1759  * decoding (which manages xmin separately, check below).
1760  */
1761  if (statusFlags & (PROC_IN_VACUUM | PROC_IN_LOGICAL_DECODING))
1762  continue;
1763 
1764  /* shared tables need to take backends in all databases into account */
1767 
1768  /*
1769  * Normally queries in other databases are ignored for anything but
1770  * the shared horizon. But in recovery we cannot compute an accurate
1771  * per-database horizon as all xids are managed via the
1772  * KnownAssignedXids machinery.
1773  *
1774  * Be careful to compute a pessimistic value when MyDatabaseId is not
1775  * set. If this is a backend in the process of starting up, we may not
1776  * use a "too aggressive" horizon (otherwise we could end up using it
1777  * to prune still needed data away). If the current backend never
1778  * connects to a database that is harmless, because
1779  * data_oldest_nonremovable will never be utilized.
1780  */
1781  if (in_recovery ||
1783  proc->databaseId == 0) /* always include WalSender */
1784  {
1785  /*
1786  * We can ignore this backend if it's running CREATE INDEX
1787  * CONCURRENTLY or REINDEX CONCURRENTLY on a "safe" index -- but
1788  * only on vacuums of user-defined tables.
1789  */
1790  if (!(statusFlags & PROC_IN_SAFE_IC))
1793 
1794  /* Catalog tables need to consider all backends in this db */
1797 
1798  }
1799  }
1800 
1801  /* catalog horizon should never be later than data */
1804 
1805  /*
1806  * If in recovery fetch oldest xid in KnownAssignedXids, will be applied
1807  * after lock is released.
1808  */
1809  if (in_recovery)
1810  kaxmin = KnownAssignedXidsGetOldestXmin();
1811 
1812  /*
1813  * No other information from shared state is needed, release the lock
1814  * immediately. The rest of the computations can be done without a lock.
1815  */
1816  LWLockRelease(ProcArrayLock);
1817 
1818  if (in_recovery)
1819  {
1828  /* temp relations cannot be accessed in recovery */
1829  }
1830  else
1831  {
1832  /*
1833  * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age.
1834  *
1835  * vacuum_defer_cleanup_age provides some additional "slop" for the
1836  * benefit of hot standby queries on standby servers. This is quick
1837  * and dirty, and perhaps not all that useful unless the primary has a
1838  * predictable transaction rate, but it offers some protection when
1839  * there's no walsender connection. Note that we are assuming
1840  * vacuum_defer_cleanup_age isn't large enough to cause wraparound ---
1841  * so guc.c should limit it to no more than the xidStopLimit threshold
1842  * in varsup.c. Also note that we intentionally don't apply
1843  * vacuum_defer_cleanup_age on standby servers.
1844  */
1857  /* defer doesn't apply to temp relations */
1858  }
1859 
1860  /*
1861  * Check whether there are replication slots requiring an older xmin.
1862  */
1867 
1868  /*
1869  * The only difference between catalog / data horizons is that the slot's
1870  * catalog xmin is applied to the catalog one (so catalogs can be accessed
1871  * for logical decoding). Initialize with data horizon, and then back up
1872  * further if necessary. Have to back up the shared horizon as well, since
1873  * that also can contain catalogs.
1874  */
1878  h->slot_catalog_xmin);
1881  h->slot_xmin);
1884  h->slot_catalog_xmin);
1885 
1886  /*
1887  * It's possible that slots / vacuum_defer_cleanup_age backed up the
1888  * horizons further than oldest_considered_running. Fix.
1889  */
1899 
1900  /*
1901  * shared horizons have to be at least as old as the oldest visible in
1902  * current db
1903  */
1908 
1909  /*
1910  * Horizons need to ensure that pg_subtrans access is still possible for
1911  * the relevant backends.
1912  */
1923  h->slot_xmin));
1926  h->slot_catalog_xmin));
1927 
1928  /* update approximate horizons with the computed horizons */
1930 }
1931 
1932 /*
1933  * Return the oldest XID for which deleted tuples must be preserved in the
1934  * passed table.
1935  *
1936  * If rel is not NULL the horizon may be considerably more recent than
1937  * otherwise (i.e. fewer tuples will be removable). In the NULL case a horizon
1938  * that is correct (but not optimal) for all relations will be returned.
1939  *
1940  * This is used by VACUUM to decide which deleted tuples must be preserved in
1941  * the passed in table.
1942  */
1945 {
1946  ComputeXidHorizonsResult horizons;
1947 
1948  ComputeXidHorizons(&horizons);
1949 
1950  /* select horizon appropriate for relation */
1951  if (rel == NULL || rel->rd_rel->relisshared)
1952  return horizons.shared_oldest_nonremovable;
1954  return horizons.catalog_oldest_nonremovable;
1955  else if (RELATION_IS_LOCAL(rel))
1956  return horizons.temp_oldest_nonremovable;
1957  else
1958  return horizons.data_oldest_nonremovable;
1959 }
1960 
1961 /*
1962  * Return the oldest transaction id any currently running backend might still
1963  * consider running. This should not be used for visibility / pruning
1964  * determinations (see GetOldestNonRemovableTransactionId()), but for
1965  * decisions like up to where pg_subtrans can be truncated.
1966  */
1969 {
1970  ComputeXidHorizonsResult horizons;
1971 
1972  ComputeXidHorizons(&horizons);
1973 
1974  return horizons.oldest_considered_running;
1975 }
1976 
1977 /*
1978  * Return the visibility horizons for a hot standby feedback message.
1979  */
1980 void
1982 {
1983  ComputeXidHorizonsResult horizons;
1984 
1985  ComputeXidHorizons(&horizons);
1986 
1987  /*
1988  * Don't want to use shared_oldest_nonremovable here, as that contains the
1989  * effect of replication slot's catalog_xmin. We want to send a separate
1990  * feedback for the catalog horizon, so the primary can remove data table
1991  * contents more aggressively.
1992  */
1993  *xmin = horizons.shared_oldest_nonremovable_raw;
1994  *catalog_xmin = horizons.slot_catalog_xmin;
1995 }
1996 
1997 /*
1998  * GetMaxSnapshotXidCount -- get max size for snapshot XID array
1999  *
2000  * We have to export this for use by snapmgr.c.
2001  */
2002 int
2004 {
2005  return procArray->maxProcs;
2006 }
2007 
2008 /*
2009  * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
2010  *
2011  * We have to export this for use by snapmgr.c.
2012  */
2013 int
2015 {
2016  return TOTAL_MAX_CACHED_SUBXIDS;
2017 }
2018 
2019 /*
2020  * Initialize old_snapshot_threshold specific parts of a newly build snapshot.
2021  */
2022 static void
2024 {
2026  {
2027  /*
2028  * If not using "snapshot too old" feature, fill related fields with
2029  * dummy values that don't require any locking.
2030  */
2031  snapshot->lsn = InvalidXLogRecPtr;
2032  snapshot->whenTaken = 0;
2033  }
2034  else
2035  {
2036  /*
2037  * Capture the current time and WAL stream location in case this
2038  * snapshot becomes old enough to need to fall back on the special
2039  * "old snapshot" logic.
2040  */
2041  snapshot->lsn = GetXLogInsertRecPtr();
2042  snapshot->whenTaken = GetSnapshotCurrentTimestamp();
2043  MaintainOldSnapshotTimeMapping(snapshot->whenTaken, snapshot->xmin);
2044  }
2045 }
2046 
2047 /*
2048  * Helper function for GetSnapshotData() that checks if the bulk of the
2049  * visibility information in the snapshot is still valid. If so, it updates
2050  * the fields that need to change and returns true. Otherwise it returns
2051  * false.
2052  *
2053  * This very likely can be evolved to not need ProcArrayLock held (at very
2054  * least in the case we already hold a snapshot), but that's for another day.
2055  */
2056 static bool
2058 {
2059  uint64 curXactCompletionCount;
2060 
2061  Assert(LWLockHeldByMe(ProcArrayLock));
2062 
2063  if (unlikely(snapshot->snapXactCompletionCount == 0))
2064  return false;
2065 
2066  curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
2067  if (curXactCompletionCount != snapshot->snapXactCompletionCount)
2068  return false;
2069 
2070  /*
2071  * If the current xactCompletionCount is still the same as it was at the
2072  * time the snapshot was built, we can be sure that rebuilding the
2073  * contents of the snapshot the hard way would result in the same snapshot
2074  * contents:
2075  *
2076  * As explained in transam/README, the set of xids considered running by
2077  * GetSnapshotData() cannot change while ProcArrayLock is held. Snapshot
2078  * contents only depend on transactions with xids and xactCompletionCount
2079  * is incremented whenever a transaction with an xid finishes (while
2080  * holding ProcArrayLock) exclusively). Thus the xactCompletionCount check
2081  * ensures we would detect if the snapshot would have changed.
2082  *
2083  * As the snapshot contents are the same as it was before, it is safe
2084  * to re-enter the snapshot's xmin into the PGPROC array. None of the rows
2085  * visible under the snapshot could already have been removed (that'd
2086  * require the set of running transactions to change) and it fulfills the
2087  * requirement that concurrent GetSnapshotData() calls yield the same
2088  * xmin.
2089  */
2091  MyProc->xmin = TransactionXmin = snapshot->xmin;
2092 
2093  RecentXmin = snapshot->xmin;
2095 
2096  snapshot->curcid = GetCurrentCommandId(false);
2097  snapshot->active_count = 0;
2098  snapshot->regd_count = 0;
2099  snapshot->copied = false;
2100 
2102 
2103  return true;
2104 }
2105 
2106 /*
2107  * GetSnapshotData -- returns information about running transactions.
2108  *
2109  * The returned snapshot includes xmin (lowest still-running xact ID),
2110  * xmax (highest completed xact ID + 1), and a list of running xact IDs
2111  * in the range xmin <= xid < xmax. It is used as follows:
2112  * All xact IDs < xmin are considered finished.
2113  * All xact IDs >= xmax are considered still running.
2114  * For an xact ID xmin <= xid < xmax, consult list to see whether
2115  * it is considered running or not.
2116  * This ensures that the set of transactions seen as "running" by the
2117  * current xact will not change after it takes the snapshot.
2118  *
2119  * All running top-level XIDs are included in the snapshot, except for lazy
2120  * VACUUM processes. We also try to include running subtransaction XIDs,
2121  * but since PGPROC has only a limited cache area for subxact XIDs, full
2122  * information may not be available. If we find any overflowed subxid arrays,
2123  * we have to mark the snapshot's subxid data as overflowed, and extra work
2124  * *may* need to be done to determine what's running (see XidInMVCCSnapshot()
2125  * in heapam_visibility.c).
2126  *
2127  * We also update the following backend-global variables:
2128  * TransactionXmin: the oldest xmin of any snapshot in use in the
2129  * current transaction (this is the same as MyProc->xmin).
2130  * RecentXmin: the xmin computed for the most recent snapshot. XIDs
2131  * older than this are known not running any more.
2132  *
2133  * And try to advance the bounds of GlobalVis{Shared,Catalog,Data,Temp}Rels
2134  * for the benefit of the GlobalVisTest* family of functions.
2135  *
2136  * Note: this function should probably not be called with an argument that's
2137  * not statically allocated (see xip allocation below).
2138  */
2139 Snapshot
2141 {
2142  ProcArrayStruct *arrayP = procArray;
2143  TransactionId *other_xids = ProcGlobal->xids;
2144  TransactionId xmin;
2145  TransactionId xmax;
2146  size_t count = 0;
2147  int subcount = 0;
2148  bool suboverflowed = false;
2149  FullTransactionId latest_completed;
2150  TransactionId oldestxid;
2151  int mypgxactoff;
2152  TransactionId myxid;
2153  uint64 curXactCompletionCount;
2154 
2157 
2158  Assert(snapshot != NULL);
2159 
2160  /*
2161  * Allocating space for maxProcs xids is usually overkill; numProcs would
2162  * be sufficient. But it seems better to do the malloc while not holding
2163  * the lock, so we can't look at numProcs. Likewise, we allocate much
2164  * more subxip storage than is probably needed.
2165  *
2166  * This does open a possibility for avoiding repeated malloc/free: since
2167  * maxProcs does not change at runtime, we can simply reuse the previous
2168  * xip arrays if any. (This relies on the fact that all callers pass
2169  * static SnapshotData structs.)
2170  */
2171  if (snapshot->xip == NULL)
2172  {
2173  /*
2174  * First call for this snapshot. Snapshot is same size whether or not
2175  * we are in recovery, see later comments.
2176  */
2177  snapshot->xip = (TransactionId *)
2179  if (snapshot->xip == NULL)
2180  ereport(ERROR,
2181  (errcode(ERRCODE_OUT_OF_MEMORY),
2182  errmsg("out of memory")));
2183  Assert(snapshot->subxip == NULL);
2184  snapshot->subxip = (TransactionId *)
2186  if (snapshot->subxip == NULL)
2187  ereport(ERROR,
2188  (errcode(ERRCODE_OUT_OF_MEMORY),
2189  errmsg("out of memory")));
2190  }
2191 
2192  /*
2193  * It is sufficient to get shared lock on ProcArrayLock, even if we are
2194  * going to set MyProc->xmin.
2195  */
2196  LWLockAcquire(ProcArrayLock, LW_SHARED);
2197 
2198  if (GetSnapshotDataReuse(snapshot))
2199  {
2200  LWLockRelease(ProcArrayLock);
2201  return snapshot;
2202  }
2203 
2204  latest_completed = ShmemVariableCache->latestCompletedXid;
2205  mypgxactoff = MyProc->pgxactoff;
2206  myxid = other_xids[mypgxactoff];
2207  Assert(myxid == MyProc->xid);
2208 
2209  oldestxid = ShmemVariableCache->oldestXid;
2210  curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
2211 
2212  /* xmax is always latestCompletedXid + 1 */
2213  xmax = XidFromFullTransactionId(latest_completed);
2214  TransactionIdAdvance(xmax);
2216 
2217  /* initialize xmin calculation with xmax */
2218  xmin = xmax;
2219 
2220  /* take own xid into account, saves a check inside the loop */
2221  if (TransactionIdIsNormal(myxid) && NormalTransactionIdPrecedes(myxid, xmin))
2222  xmin = myxid;
2223 
2225 
2226  if (!snapshot->takenDuringRecovery)
2227  {
2228  size_t numProcs = arrayP->numProcs;
2229  TransactionId *xip = snapshot->xip;
2230  int *pgprocnos = arrayP->pgprocnos;
2231  XidCacheStatus *subxidStates = ProcGlobal->subxidStates;
2232  uint8 *allStatusFlags = ProcGlobal->statusFlags;
2233 
2234  /*
2235  * First collect set of pgxactoff/xids that need to be included in the
2236  * snapshot.
2237  */
2238  for (size_t pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
2239  {
2240  /* Fetch xid just once - see GetNewTransactionId */
2241  TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
2242  uint8 statusFlags;
2243 
2244  Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff);
2245 
2246  /*
2247  * If the transaction has no XID assigned, we can skip it; it
2248  * won't have sub-XIDs either.
2249  */
2250  if (likely(xid == InvalidTransactionId))
2251  continue;
2252 
2253  /*
2254  * We don't include our own XIDs (if any) in the snapshot. It
2255  * needs to be includeded in the xmin computation, but we did so
2256  * outside the loop.
2257  */
2258  if (pgxactoff == mypgxactoff)
2259  continue;
2260 
2261  /*
2262  * The only way we are able to get here with a non-normal xid
2263  * is during bootstrap - with this backend using
2264  * BootstrapTransactionId. But the above test should filter
2265  * that out.
2266  */
2268 
2269  /*
2270  * If the XID is >= xmax, we can skip it; such transactions will
2271  * be treated as running anyway (and any sub-XIDs will also be >=
2272  * xmax).
2273  */
2274  if (!NormalTransactionIdPrecedes(xid, xmax))
2275  continue;
2276 
2277  /*
2278  * Skip over backends doing logical decoding which manages xmin
2279  * separately (check below) and ones running LAZY VACUUM.
2280  */
2281  statusFlags = allStatusFlags[pgxactoff];
2282  if (statusFlags & (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM))
2283  continue;
2284 
2285  if (NormalTransactionIdPrecedes(xid, xmin))
2286  xmin = xid;
2287 
2288  /* Add XID to snapshot. */
2289  xip[count++] = xid;
2290 
2291  /*
2292  * Save subtransaction XIDs if possible (if we've already
2293  * overflowed, there's no point). Note that the subxact XIDs must
2294  * be later than their parent, so no need to check them against
2295  * xmin. We could filter against xmax, but it seems better not to
2296  * do that much work while holding the ProcArrayLock.
2297  *
2298  * The other backend can add more subxids concurrently, but cannot
2299  * remove any. Hence it's important to fetch nxids just once.
2300  * Should be safe to use memcpy, though. (We needn't worry about
2301  * missing any xids added concurrently, because they must postdate
2302  * xmax.)
2303  *
2304  * Again, our own XIDs are not included in the snapshot.
2305  */
2306  if (!suboverflowed)
2307  {
2308 
2309  if (subxidStates[pgxactoff].overflowed)
2310  suboverflowed = true;
2311  else
2312  {
2313  int nsubxids = subxidStates[pgxactoff].count;
2314 
2315  if (nsubxids > 0)
2316  {
2317  int pgprocno = pgprocnos[pgxactoff];
2318  PGPROC *proc = &allProcs[pgprocno];
2319 
2320  pg_read_barrier(); /* pairs with GetNewTransactionId */
2321 
2322  memcpy(snapshot->subxip + subcount,
2323  (void *) proc->subxids.xids,
2324  nsubxids * sizeof(TransactionId));
2325  subcount += nsubxids;
2326  }
2327  }
2328  }
2329  }
2330  }
2331  else
2332  {
2333  /*
2334  * We're in hot standby, so get XIDs from KnownAssignedXids.
2335  *
2336  * We store all xids directly into subxip[]. Here's why:
2337  *
2338  * In recovery we don't know which xids are top-level and which are
2339  * subxacts, a design choice that greatly simplifies xid processing.
2340  *
2341  * It seems like we would want to try to put xids into xip[] only, but
2342  * that is fairly small. We would either need to make that bigger or
2343  * to increase the rate at which we WAL-log xid assignment; neither is
2344  * an appealing choice.
2345  *
2346  * We could try to store xids into xip[] first and then into subxip[]
2347  * if there are too many xids. That only works if the snapshot doesn't
2348  * overflow because we do not search subxip[] in that case. A simpler
2349  * way is to just store all xids in the subxact array because this is
2350  * by far the bigger array. We just leave the xip array empty.
2351  *
2352  * Either way we need to change the way XidInMVCCSnapshot() works
2353  * depending upon when the snapshot was taken, or change normal
2354  * snapshot processing so it matches.
2355  *
2356  * Note: It is possible for recovery to end before we finish taking
2357  * the snapshot, and for newly assigned transaction ids to be added to
2358  * the ProcArray. xmax cannot change while we hold ProcArrayLock, so
2359  * those newly added transaction ids would be filtered away, so we
2360  * need not be concerned about them.
2361  */
2362  subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin,
2363  xmax);
2364 
2365  if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid))
2366  suboverflowed = true;
2367  }
2368 
2369 
2370  /*
2371  * Fetch into local variable while ProcArrayLock is held - the
2372  * LWLockRelease below is a barrier, ensuring this happens inside the
2373  * lock.
2374  */
2375  replication_slot_xmin = procArray->replication_slot_xmin;
2376  replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
2377 
2379  MyProc->xmin = TransactionXmin = xmin;
2380 
2381  LWLockRelease(ProcArrayLock);
2382 
2383  /* maintain state for GlobalVis* */
2384  {
2385  TransactionId def_vis_xid;
2386  TransactionId def_vis_xid_data;
2387  FullTransactionId def_vis_fxid;
2388  FullTransactionId def_vis_fxid_data;
2389  FullTransactionId oldestfxid;
2390 
2391  /*
2392  * Converting oldestXid is only safe when xid horizon cannot advance,
2393  * i.e. holding locks. While we don't hold the lock anymore, all the
2394  * necessary data has been gathered with lock held.
2395  */
2396  oldestfxid = FullXidRelativeTo(latest_completed, oldestxid);
2397 
2398  /* apply vacuum_defer_cleanup_age */
2399  def_vis_xid_data =
2401 
2402  /* Check whether there's a replication slot requiring an older xmin. */
2403  def_vis_xid_data =
2404  TransactionIdOlder(def_vis_xid_data, replication_slot_xmin);
2405 
2406  /*
2407  * Rows in non-shared, non-catalog tables possibly could be vacuumed
2408  * if older than this xid.
2409  */
2410  def_vis_xid = def_vis_xid_data;
2411 
2412  /*
2413  * Check whether there's a replication slot requiring an older catalog
2414  * xmin.
2415  */
2416  def_vis_xid =
2417  TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid);
2418 
2419  def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid);
2420  def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data);
2421 
2422  /*
2423  * Check if we can increase upper bound. As a previous
2424  * GlobalVisUpdate() might have computed more aggressive values, don't
2425  * overwrite them if so.
2426  */
2427  GlobalVisSharedRels.definitely_needed =
2428  FullTransactionIdNewer(def_vis_fxid,
2429  GlobalVisSharedRels.definitely_needed);
2430  GlobalVisCatalogRels.definitely_needed =
2431  FullTransactionIdNewer(def_vis_fxid,
2432  GlobalVisCatalogRels.definitely_needed);
2433  GlobalVisDataRels.definitely_needed =
2434  FullTransactionIdNewer(def_vis_fxid_data,
2435  GlobalVisDataRels.definitely_needed);
2436  /* See temp_oldest_nonremovable computation in ComputeXidHorizons() */
2437  if (TransactionIdIsNormal(myxid))
2438  GlobalVisTempRels.definitely_needed =
2439  FullXidRelativeTo(latest_completed, myxid);
2440  else
2441  {
2442  GlobalVisTempRels.definitely_needed = latest_completed;
2443  FullTransactionIdAdvance(&GlobalVisTempRels.definitely_needed);
2444  }
2445 
2446  /*
2447  * Check if we know that we can initialize or increase the lower
2448  * bound. Currently the only cheap way to do so is to use
2449  * ShmemVariableCache->oldestXid as input.
2450  *
2451  * We should definitely be able to do better. We could e.g. put a
2452  * global lower bound value into ShmemVariableCache.
2453  */
2454  GlobalVisSharedRels.maybe_needed =
2455  FullTransactionIdNewer(GlobalVisSharedRels.maybe_needed,
2456  oldestfxid);
2457  GlobalVisCatalogRels.maybe_needed =
2458  FullTransactionIdNewer(GlobalVisCatalogRels.maybe_needed,
2459  oldestfxid);
2460  GlobalVisDataRels.maybe_needed =
2461  FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
2462  oldestfxid);
2463  /* accurate value known */
2464  GlobalVisTempRels.maybe_needed = GlobalVisTempRels.definitely_needed;
2465  }
2466 
2467  RecentXmin = xmin;
2469 
2470  snapshot->xmin = xmin;
2471  snapshot->xmax = xmax;
2472  snapshot->xcnt = count;
2473  snapshot->subxcnt = subcount;
2474  snapshot->suboverflowed = suboverflowed;
2475  snapshot->snapXactCompletionCount = curXactCompletionCount;
2476 
2477  snapshot->curcid = GetCurrentCommandId(false);
2478 
2479  /*
2480  * This is a new snapshot, so set both refcounts are zero, and mark it as
2481  * not copied in persistent memory.
2482  */
2483  snapshot->active_count = 0;
2484  snapshot->regd_count = 0;
2485  snapshot->copied = false;
2486 
2488 
2489  return snapshot;
2490 }
2491 
2492 /*
2493  * ProcArrayInstallImportedXmin -- install imported xmin into MyProc->xmin
2494  *
2495  * This is called when installing a snapshot imported from another
2496  * transaction. To ensure that OldestXmin doesn't go backwards, we must
2497  * check that the source transaction is still running, and we'd better do
2498  * that atomically with installing the new xmin.
2499  *
2500  * Returns true if successful, false if source xact is no longer running.
2501  */
2502 bool
2504  VirtualTransactionId *sourcevxid)
2505 {
2506  bool result = false;
2507  ProcArrayStruct *arrayP = procArray;
2508  int index;
2509 
2511  if (!sourcevxid)
2512  return false;
2513 
2514  /* Get lock so source xact can't end while we're doing this */
2515  LWLockAcquire(ProcArrayLock, LW_SHARED);
2516 
2517  for (index = 0; index < arrayP->numProcs; index++)
2518  {
2519  int pgprocno = arrayP->pgprocnos[index];
2520  PGPROC *proc = &allProcs[pgprocno];
2521  int statusFlags = ProcGlobal->statusFlags[index];
2522  TransactionId xid;
2523 
2524  /* Ignore procs running LAZY VACUUM */
2525  if (statusFlags & PROC_IN_VACUUM)
2526  continue;
2527 
2528  /* We are only interested in the specific virtual transaction. */
2529  if (proc->backendId != sourcevxid->backendId)
2530  continue;
2531  if (proc->lxid != sourcevxid->localTransactionId)
2532  continue;
2533 
2534  /*
2535  * We check the transaction's database ID for paranoia's sake: if it's
2536  * in another DB then its xmin does not cover us. Caller should have
2537  * detected this already, so we just treat any funny cases as
2538  * "transaction not found".
2539  */
2540  if (proc->databaseId != MyDatabaseId)
2541  continue;
2542 
2543  /*
2544  * Likewise, let's just make real sure its xmin does cover us.
2545  */
2546  xid = UINT32_ACCESS_ONCE(proc->xmin);
2547  if (!TransactionIdIsNormal(xid) ||
2548  !TransactionIdPrecedesOrEquals(xid, xmin))
2549  continue;
2550 
2551  /*
2552  * We're good. Install the new xmin. As in GetSnapshotData, set
2553  * TransactionXmin too. (Note that because snapmgr.c called
2554  * GetSnapshotData first, we'll be overwriting a valid xmin here, so
2555  * we don't check that.)
2556  */
2557  MyProc->xmin = TransactionXmin = xmin;
2558 
2559  result = true;
2560  break;
2561  }
2562 
2563  LWLockRelease(ProcArrayLock);
2564 
2565  return result;
2566 }
2567 
2568 /*
2569  * ProcArrayInstallRestoredXmin -- install restored xmin into MyProc->xmin
2570  *
2571  * This is like ProcArrayInstallImportedXmin, but we have a pointer to the
2572  * PGPROC of the transaction from which we imported the snapshot, rather than
2573  * an XID.
2574  *
2575  * Returns true if successful, false if source xact is no longer running.
2576  */
2577 bool
2579 {
2580  bool result = false;
2581  TransactionId xid;
2582 
2584  Assert(proc != NULL);
2585 
2586  /* Get lock so source xact can't end while we're doing this */
2587  LWLockAcquire(ProcArrayLock, LW_SHARED);
2588 
2589  /*
2590  * Be certain that the referenced PGPROC has an advertised xmin which is
2591  * no later than the one we're installing, so that the system-wide xmin
2592  * can't go backwards. Also, make sure it's running in the same database,
2593  * so that the per-database xmin cannot go backwards.
2594  */
2595  xid = UINT32_ACCESS_ONCE(proc->xmin);
2596  if (proc->databaseId == MyDatabaseId &&
2597  TransactionIdIsNormal(xid) &&
2598  TransactionIdPrecedesOrEquals(xid, xmin))
2599  {
2600  MyProc->xmin = TransactionXmin = xmin;
2601  result = true;
2602  }
2603 
2604  LWLockRelease(ProcArrayLock);
2605 
2606  return result;
2607 }
2608 
2609 /*
2610  * GetRunningTransactionData -- returns information about running transactions.
2611  *
2612  * Similar to GetSnapshotData but returns more information. We include
2613  * all PGPROCs with an assigned TransactionId, even VACUUM processes and
2614  * prepared transactions.
2615  *
2616  * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
2617  * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
2618  * array until the caller has WAL-logged this snapshot, and releases the
2619  * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
2620  * lock is released.
2621  *
2622  * The returned data structure is statically allocated; caller should not
2623  * modify it, and must not assume it is valid past the next call.
2624  *
2625  * This is never executed during recovery so there is no need to look at
2626  * KnownAssignedXids.
2627  *
2628  * Dummy PGPROCs from prepared transaction are included, meaning that this
2629  * may return entries with duplicated TransactionId values coming from
2630  * transaction finishing to prepare. Nothing is done about duplicated
2631  * entries here to not hold on ProcArrayLock more than necessary.
2632  *
2633  * We don't worry about updating other counters, we want to keep this as
2634  * simple as possible and leave GetSnapshotData() as the primary code for
2635  * that bookkeeping.
2636  *
2637  * Note that if any transaction has overflowed its cached subtransactions
2638  * then there is no real need include any subtransactions.
2639  */
2642 {
2643  /* result workspace */
2644  static RunningTransactionsData CurrentRunningXactsData;
2645 
2646  ProcArrayStruct *arrayP = procArray;
2647  TransactionId *other_xids = ProcGlobal->xids;
2648  RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
2649  TransactionId latestCompletedXid;
2650  TransactionId oldestRunningXid;
2651  TransactionId *xids;
2652  int index;
2653  int count;
2654  int subcount;
2655  bool suboverflowed;
2656 
2658 
2659  /*
2660  * Allocating space for maxProcs xids is usually overkill; numProcs would
2661  * be sufficient. But it seems better to do the malloc while not holding
2662  * the lock, so we can't look at numProcs. Likewise, we allocate much
2663  * more subxip storage than is probably needed.
2664  *
2665  * Should only be allocated in bgwriter, since only ever executed during
2666  * checkpoints.
2667  */
2668  if (CurrentRunningXacts->xids == NULL)
2669  {
2670  /*
2671  * First call
2672  */
2673  CurrentRunningXacts->xids = (TransactionId *)
2675  if (CurrentRunningXacts->xids == NULL)
2676  ereport(ERROR,
2677  (errcode(ERRCODE_OUT_OF_MEMORY),
2678  errmsg("out of memory")));
2679  }
2680 
2681  xids = CurrentRunningXacts->xids;
2682 
2683  count = subcount = 0;
2684  suboverflowed = false;
2685 
2686  /*
2687  * Ensure that no xids enter or leave the procarray while we obtain
2688  * snapshot.
2689  */
2690  LWLockAcquire(ProcArrayLock, LW_SHARED);
2691  LWLockAcquire(XidGenLock, LW_SHARED);
2692 
2693  latestCompletedXid =
2695  oldestRunningXid =
2697 
2698  /*
2699  * Spin over procArray collecting all xids
2700  */
2701  for (index = 0; index < arrayP->numProcs; index++)
2702  {
2703  TransactionId xid;
2704 
2705  /* Fetch xid just once - see GetNewTransactionId */
2706  xid = UINT32_ACCESS_ONCE(other_xids[index]);
2707 
2708  /*
2709  * We don't need to store transactions that don't have a TransactionId
2710  * yet because they will not show as running on a standby server.
2711  */
2712  if (!TransactionIdIsValid(xid))
2713  continue;
2714 
2715  /*
2716  * Be careful not to exclude any xids before calculating the values of
2717  * oldestRunningXid and suboverflowed, since these are used to clean
2718  * up transaction information held on standbys.
2719  */
2720  if (TransactionIdPrecedes(xid, oldestRunningXid))
2721  oldestRunningXid = xid;
2722 
2723  if (ProcGlobal->subxidStates[index].overflowed)
2724  suboverflowed = true;
2725 
2726  /*
2727  * If we wished to exclude xids this would be the right place for it.
2728  * Procs with the PROC_IN_VACUUM flag set don't usually assign xids,
2729  * but they do during truncation at the end when they get the lock and
2730  * truncate, so it is not much of a problem to include them if they
2731  * are seen and it is cleaner to include them.
2732  */
2733 
2734  xids[count++] = xid;
2735  }
2736 
2737  /*
2738  * Spin over procArray collecting all subxids, but only if there hasn't
2739  * been a suboverflow.
2740  */
2741  if (!suboverflowed)
2742  {
2743  XidCacheStatus *other_subxidstates = ProcGlobal->subxidStates;
2744 
2745  for (index = 0; index < arrayP->numProcs; index++)
2746  {
2747  int pgprocno = arrayP->pgprocnos[index];
2748  PGPROC *proc = &allProcs[pgprocno];
2749  int nsubxids;
2750 
2751  /*
2752  * Save subtransaction XIDs. Other backends can't add or remove
2753  * entries while we're holding XidGenLock.
2754  */
2755  nsubxids = other_subxidstates[index].count;
2756  if (nsubxids > 0)
2757  {
2758  /* barrier not really required, as XidGenLock is held, but ... */
2759  pg_read_barrier(); /* pairs with GetNewTransactionId */
2760 
2761  memcpy(&xids[count], (void *) proc->subxids.xids,
2762  nsubxids * sizeof(TransactionId));
2763  count += nsubxids;
2764  subcount += nsubxids;
2765 
2766  /*
2767  * Top-level XID of a transaction is always less than any of
2768  * its subxids, so we don't need to check if any of the
2769  * subxids are smaller than oldestRunningXid
2770  */
2771  }
2772  }
2773  }
2774 
2775  /*
2776  * It's important *not* to include the limits set by slots here because
2777  * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
2778  * were to be included here the initial value could never increase because
2779  * of a circular dependency where slots only increase their limits when
2780  * running xacts increases oldestRunningXid and running xacts only
2781  * increases if slots do.
2782  */
2783 
2784  CurrentRunningXacts->xcnt = count - subcount;
2785  CurrentRunningXacts->subxcnt = subcount;
2786  CurrentRunningXacts->subxid_overflow = suboverflowed;
2788  CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
2789  CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
2790 
2791  Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
2792  Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
2793  Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
2794 
2795  /* We don't release the locks here, the caller is responsible for that */
2796 
2797  return CurrentRunningXacts;
2798 }
2799 
2800 /*
2801  * GetOldestActiveTransactionId()
2802  *
2803  * Similar to GetSnapshotData but returns just oldestActiveXid. We include
2804  * all PGPROCs with an assigned TransactionId, even VACUUM processes.
2805  * We look at all databases, though there is no need to include WALSender
2806  * since this has no effect on hot standby conflicts.
2807  *
2808  * This is never executed during recovery so there is no need to look at
2809  * KnownAssignedXids.
2810  *
2811  * We don't worry about updating other counters, we want to keep this as
2812  * simple as possible and leave GetSnapshotData() as the primary code for
2813  * that bookkeeping.
2814  */
2817 {
2818  ProcArrayStruct *arrayP = procArray;
2819  TransactionId *other_xids = ProcGlobal->xids;
2820  TransactionId oldestRunningXid;
2821  int index;
2822 
2824 
2825  /*
2826  * Read nextXid, as the upper bound of what's still active.
2827  *
2828  * Reading a TransactionId is atomic, but we must grab the lock to make
2829  * sure that all XIDs < nextXid are already present in the proc array (or
2830  * have already completed), when we spin over it.
2831  */
2832  LWLockAcquire(XidGenLock, LW_SHARED);
2834  LWLockRelease(XidGenLock);
2835 
2836  /*
2837  * Spin over procArray collecting all xids and subxids.
2838  */
2839  LWLockAcquire(ProcArrayLock, LW_SHARED);
2840  for (index = 0; index < arrayP->numProcs; index++)
2841  {
2842  TransactionId xid;
2843 
2844  /* Fetch xid just once - see GetNewTransactionId */
2845  xid = UINT32_ACCESS_ONCE(other_xids[index]);
2846 
2847  if (!TransactionIdIsNormal(xid))
2848  continue;
2849 
2850  if (TransactionIdPrecedes(xid, oldestRunningXid))
2851  oldestRunningXid = xid;
2852 
2853  /*
2854  * Top-level XID of a transaction is always less than any of its
2855  * subxids, so we don't need to check if any of the subxids are
2856  * smaller than oldestRunningXid
2857  */
2858  }
2859  LWLockRelease(ProcArrayLock);
2860 
2861  return oldestRunningXid;
2862 }
2863 
2864 /*
2865  * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
2866  *
2867  * Returns the oldest xid that we can guarantee not to have been affected by
2868  * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
2869  * transaction aborted. Note that the value can (and most of the time will) be
2870  * much more conservative than what really has been affected by vacuum, but we
2871  * currently don't have better data available.
2872  *
2873  * This is useful to initialize the cutoff xid after which a new changeset
2874  * extraction replication slot can start decoding changes.
2875  *
2876  * Must be called with ProcArrayLock held either shared or exclusively,
2877  * although most callers will want to use exclusive mode since it is expected
2878  * that the caller will immediately use the xid to peg the xmin horizon.
2879  */
2882 {
2883  ProcArrayStruct *arrayP = procArray;
2884  TransactionId oldestSafeXid;
2885  int index;
2886  bool recovery_in_progress = RecoveryInProgress();
2887 
2888  Assert(LWLockHeldByMe(ProcArrayLock));
2889 
2890  /*
2891  * Acquire XidGenLock, so no transactions can acquire an xid while we're
2892  * running. If no transaction with xid were running concurrently a new xid
2893  * could influence the RecentXmin et al.
2894  *
2895  * We initialize the computation to nextXid since that's guaranteed to be
2896  * a safe, albeit pessimal, value.
2897  */
2898  LWLockAcquire(XidGenLock, LW_SHARED);
2900 
2901  /*
2902  * If there's already a slot pegging the xmin horizon, we can start with
2903  * that value, it's guaranteed to be safe since it's computed by this
2904  * routine initially and has been enforced since. We can always use the
2905  * slot's general xmin horizon, but the catalog horizon is only usable
2906  * when only catalog data is going to be looked at.
2907  */
2908  if (TransactionIdIsValid(procArray->replication_slot_xmin) &&
2910  oldestSafeXid))
2911  oldestSafeXid = procArray->replication_slot_xmin;
2912 
2913  if (catalogOnly &&
2916  oldestSafeXid))
2917  oldestSafeXid = procArray->replication_slot_catalog_xmin;
2918 
2919  /*
2920  * If we're not in recovery, we walk over the procarray and collect the
2921  * lowest xid. Since we're called with ProcArrayLock held and have
2922  * acquired XidGenLock, no entries can vanish concurrently, since
2923  * ProcGlobal->xids[i] is only set with XidGenLock held and only cleared
2924  * with ProcArrayLock held.
2925  *
2926  * In recovery we can't lower the safe value besides what we've computed
2927  * above, so we'll have to wait a bit longer there. We unfortunately can
2928  * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
2929  * machinery can miss values and return an older value than is safe.
2930  */
2931  if (!recovery_in_progress)
2932  {
2933  TransactionId *other_xids = ProcGlobal->xids;
2934 
2935  /*
2936  * Spin over procArray collecting min(ProcGlobal->xids[i])
2937  */
2938  for (index = 0; index < arrayP->numProcs; index++)
2939  {
2940  TransactionId xid;
2941 
2942  /* Fetch xid just once - see GetNewTransactionId */
2943  xid = UINT32_ACCESS_ONCE(other_xids[index]);
2944 
2945  if (!TransactionIdIsNormal(xid))
2946  continue;
2947 
2948  if (TransactionIdPrecedes(xid, oldestSafeXid))
2949  oldestSafeXid = xid;
2950  }
2951  }
2952 
2953  LWLockRelease(XidGenLock);
2954 
2955  return oldestSafeXid;
2956 }
2957 
2958 /*
2959  * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
2960  * delaying checkpoint because they have critical actions in progress.
2961  *
2962  * Constructs an array of VXIDs of transactions that are currently in commit
2963  * critical sections, as shown by having delayChkpt set in their PGPROC.
2964  *
2965  * Returns a palloc'd array that should be freed by the caller.
2966  * *nvxids is the number of valid entries.
2967  *
2968  * Note that because backends set or clear delayChkpt without holding any lock,
2969  * the result is somewhat indeterminate, but we don't really care. Even in
2970  * a multiprocessor with delayed writes to shared memory, it should be certain
2971  * that setting of delayChkpt will propagate to shared memory when the backend
2972  * takes a lock, so we cannot fail to see a virtual xact as delayChkpt if
2973  * it's already inserted its commit record. Whether it takes a little while
2974  * for clearing of delayChkpt to propagate is unimportant for correctness.
2975  */
2978 {
2979  VirtualTransactionId *vxids;
2980  ProcArrayStruct *arrayP = procArray;
2981  int count = 0;
2982  int index;
2983 
2984  /* allocate what's certainly enough result space */
2985  vxids = (VirtualTransactionId *)
2986  palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
2987 
2988  LWLockAcquire(ProcArrayLock, LW_SHARED);
2989 
2990  for (index = 0; index < arrayP->numProcs; index++)
2991  {
2992  int pgprocno = arrayP->pgprocnos[index];
2993  PGPROC *proc = &allProcs[pgprocno];
2994 
2995  if (proc->delayChkpt)
2996  {
2997  VirtualTransactionId vxid;
2998 
2999  GET_VXID_FROM_PGPROC(vxid, *proc);
3000  if (VirtualTransactionIdIsValid(vxid))
3001  vxids[count++] = vxid;
3002  }
3003  }
3004 
3005  LWLockRelease(ProcArrayLock);
3006 
3007  *nvxids = count;
3008  return vxids;
3009 }
3010 
3011 /*
3012  * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
3013  *
3014  * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
3015  * of the specified VXIDs are still in critical sections of code.
3016  *
3017  * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
3018  * those numbers should be small enough for it not to be a problem.
3019  */
3020 bool
3022 {
3023  bool result = false;
3024  ProcArrayStruct *arrayP = procArray;
3025  int index;
3026 
3027  LWLockAcquire(ProcArrayLock, LW_SHARED);
3028 
3029  for (index = 0; index < arrayP->numProcs; index++)
3030  {
3031  int pgprocno = arrayP->pgprocnos[index];
3032  PGPROC *proc = &allProcs[pgprocno];
3033  VirtualTransactionId vxid;
3034 
3035  GET_VXID_FROM_PGPROC(vxid, *proc);
3036 
3037  if (proc->delayChkpt && VirtualTransactionIdIsValid(vxid))
3038  {
3039  int i;
3040 
3041  for (i = 0; i < nvxids; i++)
3042  {
3043  if (VirtualTransactionIdEquals(vxid, vxids[i]))
3044  {
3045  result = true;
3046  break;
3047  }
3048  }
3049  if (result)
3050  break;
3051  }
3052  }
3053 
3054  LWLockRelease(ProcArrayLock);
3055 
3056  return result;
3057 }
3058 
3059 /*
3060  * BackendPidGetProc -- get a backend's PGPROC given its PID
3061  *
3062  * Returns NULL if not found. Note that it is up to the caller to be
3063  * sure that the question remains meaningful for long enough for the
3064  * answer to be used ...
3065  */
3066 PGPROC *
3068 {
3069  PGPROC *result;
3070 
3071  if (pid == 0) /* never match dummy PGPROCs */
3072  return NULL;
3073 
3074  LWLockAcquire(ProcArrayLock, LW_SHARED);
3075 
3076  result = BackendPidGetProcWithLock(pid);
3077 
3078  LWLockRelease(ProcArrayLock);
3079 
3080  return result;
3081 }
3082 
3083 /*
3084  * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID
3085  *
3086  * Same as above, except caller must be holding ProcArrayLock. The found
3087  * entry, if any, can be assumed to be valid as long as the lock remains held.
3088  */
3089 PGPROC *
3091 {
3092  PGPROC *result = NULL;
3093  ProcArrayStruct *arrayP = procArray;
3094  int index;
3095 
3096  if (pid == 0) /* never match dummy PGPROCs */
3097  return NULL;
3098 
3099  for (index = 0; index < arrayP->numProcs; index++)
3100  {
3101  PGPROC *proc = &allProcs[arrayP->pgprocnos[index]];
3102 
3103  if (proc->pid == pid)
3104  {
3105  result = proc;
3106  break;
3107  }
3108  }
3109 
3110  return result;
3111 }
3112 
3113 /*
3114  * BackendXidGetPid -- get a backend's pid given its XID
3115  *
3116  * Returns 0 if not found or it's a prepared transaction. Note that
3117  * it is up to the caller to be sure that the question remains
3118  * meaningful for long enough for the answer to be used ...
3119  *
3120  * Only main transaction Ids are considered. This function is mainly
3121  * useful for determining what backend owns a lock.
3122  *
3123  * Beware that not every xact has an XID assigned. However, as long as you
3124  * only call this using an XID found on disk, you're safe.
3125  */
3126 int
3128 {
3129  int result = 0;
3130  ProcArrayStruct *arrayP = procArray;
3131  TransactionId *other_xids = ProcGlobal->xids;
3132  int index;
3133 
3134  if (xid == InvalidTransactionId) /* never match invalid xid */
3135  return 0;
3136 
3137  LWLockAcquire(ProcArrayLock, LW_SHARED);
3138 
3139  for (index = 0; index < arrayP->numProcs; index++)
3140  {
3141  int pgprocno = arrayP->pgprocnos[index];
3142  PGPROC *proc = &allProcs[pgprocno];
3143 
3144  if (other_xids[index] == xid)
3145  {
3146  result = proc->pid;
3147  break;
3148  }
3149  }
3150 
3151  LWLockRelease(ProcArrayLock);
3152 
3153  return result;
3154 }
3155 
3156 /*
3157  * IsBackendPid -- is a given pid a running backend
3158  *
3159  * This is not called by the backend, but is called by external modules.
3160  */
3161 bool
3163 {
3164  return (BackendPidGetProc(pid) != NULL);
3165 }
3166 
3167 
3168 /*
3169  * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
3170  *
3171  * The array is palloc'd. The number of valid entries is returned into *nvxids.
3172  *
3173  * The arguments allow filtering the set of VXIDs returned. Our own process
3174  * is always skipped. In addition:
3175  * If limitXmin is not InvalidTransactionId, skip processes with
3176  * xmin > limitXmin.
3177  * If excludeXmin0 is true, skip processes with xmin = 0.
3178  * If allDbs is false, skip processes attached to other databases.
3179  * If excludeVacuum isn't zero, skip processes for which
3180  * (statusFlags & excludeVacuum) is not zero.
3181  *
3182  * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
3183  * allow skipping backends whose oldest live snapshot is no older than
3184  * some snapshot we have. Since we examine the procarray with only shared
3185  * lock, there are race conditions: a backend could set its xmin just after
3186  * we look. Indeed, on multiprocessors with weak memory ordering, the
3187  * other backend could have set its xmin *before* we look. We know however
3188  * that such a backend must have held shared ProcArrayLock overlapping our
3189  * own hold of ProcArrayLock, else we would see its xmin update. Therefore,
3190  * any snapshot the other backend is taking concurrently with our scan cannot
3191  * consider any transactions as still running that we think are committed
3192  * (since backends must hold ProcArrayLock exclusive to commit).
3193  */
3195 GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
3196  bool allDbs, int excludeVacuum,
3197  int *nvxids)
3198 {
3199  VirtualTransactionId *vxids;
3200  ProcArrayStruct *arrayP = procArray;
3201  int count = 0;
3202  int index;
3203 
3204  /* allocate what's certainly enough result space */
3205  vxids = (VirtualTransactionId *)
3206  palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
3207 
3208  LWLockAcquire(ProcArrayLock, LW_SHARED);
3209 
3210  for (index = 0; index < arrayP->numProcs; index++)
3211  {
3212  int pgprocno = arrayP->pgprocnos[index];
3213  PGPROC *proc = &allProcs[pgprocno];
3214  uint8 statusFlags = ProcGlobal->statusFlags[index];
3215 
3216  if (proc == MyProc)
3217  continue;
3218 
3219  if (excludeVacuum & statusFlags)
3220  continue;
3221 
3222  if (allDbs || proc->databaseId == MyDatabaseId)
3223  {
3224  /* Fetch xmin just once - might change on us */
3225  TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
3226 
3227  if (excludeXmin0 && !TransactionIdIsValid(pxmin))
3228  continue;
3229 
3230  /*
3231  * InvalidTransactionId precedes all other XIDs, so a proc that
3232  * hasn't set xmin yet will not be rejected by this test.
3233  */
3234  if (!TransactionIdIsValid(limitXmin) ||
3235  TransactionIdPrecedesOrEquals(pxmin, limitXmin))
3236  {
3237  VirtualTransactionId vxid;
3238 
3239  GET_VXID_FROM_PGPROC(vxid, *proc);
3240  if (VirtualTransactionIdIsValid(vxid))
3241  vxids[count++] = vxid;
3242  }
3243  }
3244  }
3245 
3246  LWLockRelease(ProcArrayLock);
3247 
3248  *nvxids = count;
3249  return vxids;
3250 }
3251 
3252 /*
3253  * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
3254  *
3255  * Usage is limited to conflict resolution during recovery on standby servers.
3256  * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId
3257  * in cases where we cannot accurately determine a value for latestRemovedXid.
3258  *
3259  * If limitXmin is InvalidTransactionId then we want to kill everybody,
3260  * so we're not worried if they have a snapshot or not, nor does it really
3261  * matter what type of lock we hold.
3262  *
3263  * All callers that are checking xmins always now supply a valid and useful
3264  * value for limitXmin. The limitXmin is always lower than the lowest
3265  * numbered KnownAssignedXid that is not already a FATAL error. This is
3266  * because we only care about cleanup records that are cleaning up tuple
3267  * versions from committed transactions. In that case they will only occur
3268  * at the point where the record is less than the lowest running xid. That
3269  * allows us to say that if any backend takes a snapshot concurrently with
3270  * us then the conflict assessment made here would never include the snapshot
3271  * that is being derived. So we take LW_SHARED on the ProcArray and allow
3272  * concurrent snapshots when limitXmin is valid. We might think about adding
3273  * Assert(limitXmin < lowest(KnownAssignedXids))
3274  * but that would not be true in the case of FATAL errors lagging in array,
3275  * but we already know those are bogus anyway, so we skip that test.
3276  *
3277  * If dbOid is valid we skip backends attached to other databases.
3278  *
3279  * Be careful to *not* pfree the result from this function. We reuse
3280  * this array sufficiently often that we use malloc for the result.
3281  */
3284 {
3285  static VirtualTransactionId *vxids;
3286  ProcArrayStruct *arrayP = procArray;
3287  int count = 0;
3288  int index;
3289 
3290  /*
3291  * If first time through, get workspace to remember main XIDs in. We
3292  * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
3293  * result space, remembering room for a terminator.
3294  */
3295  if (vxids == NULL)
3296  {
3297  vxids = (VirtualTransactionId *)
3298  malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
3299  if (vxids == NULL)
3300  ereport(ERROR,
3301  (errcode(ERRCODE_OUT_OF_MEMORY),
3302  errmsg("out of memory")));
3303  }
3304 
3305  LWLockAcquire(ProcArrayLock, LW_SHARED);
3306 
3307  for (index = 0; index < arrayP->numProcs; index++)
3308  {
3309  int pgprocno = arrayP->pgprocnos[index];
3310  PGPROC *proc = &allProcs[pgprocno];
3311 
3312  /* Exclude prepared transactions */
3313  if (proc->pid == 0)
3314  continue;
3315 
3316  if (!OidIsValid(dbOid) ||
3317  proc->databaseId == dbOid)
3318  {
3319  /* Fetch xmin just once - can't change on us, but good coding */
3320  TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
3321 
3322  /*
3323  * We ignore an invalid pxmin because this means that backend has
3324  * no snapshot currently. We hold a Share lock to avoid contention
3325  * with users taking snapshots. That is not a problem because the
3326  * current xmin is always at least one higher than the latest
3327  * removed xid, so any new snapshot would never conflict with the
3328  * test here.
3329  */
3330  if (!TransactionIdIsValid(limitXmin) ||
3331  (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin)))
3332  {
3333  VirtualTransactionId vxid;
3334 
3335  GET_VXID_FROM_PGPROC(vxid, *proc);
3336  if (VirtualTransactionIdIsValid(vxid))
3337  vxids[count++] = vxid;
3338  }
3339  }
3340  }
3341 
3342  LWLockRelease(ProcArrayLock);
3343 
3344  /* add the terminator */
3345  vxids[count].backendId = InvalidBackendId;
3347 
3348  return vxids;
3349 }
3350 
3351 /*
3352  * CancelVirtualTransaction - used in recovery conflict processing
3353  *
3354  * Returns pid of the process signaled, or 0 if not found.
3355  */
3356 pid_t
3358 {
3359  return SignalVirtualTransaction(vxid, sigmode, true);
3360 }
3361 
3362 pid_t
3364  bool conflictPending)
3365 {
3366  ProcArrayStruct *arrayP = procArray;
3367  int index;
3368  pid_t pid = 0;
3369 
3370  LWLockAcquire(ProcArrayLock, LW_SHARED);
3371 
3372  for (index = 0; index < arrayP->numProcs; index++)
3373  {
3374  int pgprocno = arrayP->pgprocnos[index];
3375  PGPROC *proc = &allProcs[pgprocno];
3376  VirtualTransactionId procvxid;
3377 
3378  GET_VXID_FROM_PGPROC(procvxid, *proc);
3379 
3380  if (procvxid.backendId == vxid.backendId &&
3381  procvxid.localTransactionId == vxid.localTransactionId)
3382  {
3383  proc->recoveryConflictPending = conflictPending;
3384  pid = proc->pid;
3385  if (pid != 0)
3386  {
3387  /*
3388  * Kill the pid if it's still here. If not, that's what we
3389  * wanted so ignore any errors.
3390  */
3391  (void) SendProcSignal(pid, sigmode, vxid.backendId);
3392  }
3393  break;
3394  }
3395  }
3396 
3397  LWLockRelease(ProcArrayLock);
3398 
3399  return pid;
3400 }
3401 
3402 /*
3403  * MinimumActiveBackends --- count backends (other than myself) that are
3404  * in active transactions. Return true if the count exceeds the
3405  * minimum threshold passed. This is used as a heuristic to decide if
3406  * a pre-XLOG-flush delay is worthwhile during commit.
3407  *
3408  * Do not count backends that are blocked waiting for locks, since they are
3409  * not going to get to run until someone else commits.
3410  */
3411 bool
3413 {
3414  ProcArrayStruct *arrayP = procArray;
3415  int count = 0;
3416  int index;
3417 
3418  /* Quick short-circuit if no minimum is specified */
3419  if (min == 0)
3420  return true;
3421 
3422  /*
3423  * Note: for speed, we don't acquire ProcArrayLock. This is a little bit
3424  * bogus, but since we are only testing fields for zero or nonzero, it
3425  * should be OK. The result is only used for heuristic purposes anyway...
3426  */
3427  for (index = 0; index < arrayP->numProcs; index++)
3428  {
3429  int pgprocno = arrayP->pgprocnos[index];
3430  PGPROC *proc = &allProcs[pgprocno];
3431 
3432  /*
3433  * Since we're not holding a lock, need to be prepared to deal with
3434  * garbage, as someone could have incremented numProcs but not yet
3435  * filled the structure.
3436  *
3437  * If someone just decremented numProcs, 'proc' could also point to a
3438  * PGPROC entry that's no longer in the array. It still points to a
3439  * PGPROC struct, though, because freed PGPROC entries just go to the
3440  * free list and are recycled. Its contents are nonsense in that case,
3441  * but that's acceptable for this function.
3442  */
3443  if (pgprocno == -1)
3444  continue; /* do not count deleted entries */
3445  if (proc == MyProc)
3446  continue; /* do not count myself */
3447  if (proc->xid == InvalidTransactionId)
3448  continue; /* do not count if no XID assigned */
3449  if (proc->pid == 0)
3450  continue; /* do not count prepared xacts */
3451  if (proc->waitLock != NULL)
3452  continue; /* do not count if blocked on a lock */
3453  count++;
3454  if (count >= min)
3455  break;
3456  }
3457 
3458  return count >= min;
3459 }
3460 
3461 /*
3462  * CountDBBackends --- count backends that are using specified database
3463  */
3464 int
3466 {
3467  ProcArrayStruct *arrayP = procArray;
3468  int count = 0;
3469  int index;
3470 
3471  LWLockAcquire(ProcArrayLock, LW_SHARED);
3472 
3473  for (index = 0; index < arrayP->numProcs; index++)
3474  {
3475  int pgprocno = arrayP->pgprocnos[index];
3476  PGPROC *proc = &allProcs[pgprocno];
3477 
3478  if (proc->pid == 0)
3479  continue; /* do not count prepared xacts */
3480  if (!OidIsValid(databaseid) ||
3481  proc->databaseId == databaseid)
3482  count++;
3483  }
3484 
3485  LWLockRelease(ProcArrayLock);
3486 
3487  return count;
3488 }
3489 
3490 /*
3491  * CountDBConnections --- counts database backends ignoring any background
3492  * worker processes
3493  */
3494 int
3496 {
3497  ProcArrayStruct *arrayP = procArray;
3498  int count = 0;
3499  int index;
3500 
3501  LWLockAcquire(ProcArrayLock, LW_SHARED);
3502 
3503  for (index = 0; index < arrayP->numProcs; index++)
3504  {
3505  int pgprocno = arrayP->pgprocnos[index];
3506  PGPROC *proc = &allProcs[pgprocno];
3507 
3508  if (proc->pid == 0)
3509  continue; /* do not count prepared xacts */
3510  if (proc->isBackgroundWorker)
3511  continue; /* do not count background workers */
3512  if (!OidIsValid(databaseid) ||
3513  proc->databaseId == databaseid)
3514  count++;
3515  }
3516 
3517  LWLockRelease(ProcArrayLock);
3518 
3519  return count;
3520 }
3521 
3522 /*
3523  * CancelDBBackends --- cancel backends that are using specified database
3524  */
3525 void
3526 CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
3527 {
3528  ProcArrayStruct *arrayP = procArray;
3529  int index;
3530 
3531  /* tell all backends to die */
3532  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3533 
3534  for (index = 0; index < arrayP->numProcs; index++)
3535  {
3536  int pgprocno = arrayP->pgprocnos[index];
3537  PGPROC *proc = &allProcs[pgprocno];
3538 
3539  if (databaseid == InvalidOid || proc->databaseId == databaseid)
3540  {
3541  VirtualTransactionId procvxid;
3542  pid_t pid;
3543 
3544  GET_VXID_FROM_PGPROC(procvxid, *proc);
3545 
3546  proc->recoveryConflictPending = conflictPending;
3547  pid = proc->pid;
3548  if (pid != 0)
3549  {
3550  /*
3551  * Kill the pid if it's still here. If not, that's what we
3552  * wanted so ignore any errors.
3553  */
3554  (void) SendProcSignal(pid, sigmode, procvxid.backendId);
3555  }
3556  }
3557  }
3558 
3559  LWLockRelease(ProcArrayLock);
3560 }
3561 
3562 /*
3563  * CountUserBackends --- count backends that are used by specified user
3564  */
3565 int
3567 {
3568  ProcArrayStruct *arrayP = procArray;
3569  int count = 0;
3570  int index;
3571 
3572  LWLockAcquire(ProcArrayLock, LW_SHARED);
3573 
3574  for (index = 0; index < arrayP->numProcs; index++)
3575  {
3576  int pgprocno = arrayP->pgprocnos[index];
3577  PGPROC *proc = &allProcs[pgprocno];
3578 
3579  if (proc->pid == 0)
3580  continue; /* do not count prepared xacts */
3581  if (proc->isBackgroundWorker)
3582  continue; /* do not count background workers */
3583  if (proc->roleId == roleid)
3584  count++;
3585  }
3586 
3587  LWLockRelease(ProcArrayLock);
3588 
3589  return count;
3590 }
3591 
3592 /*
3593  * CountOtherDBBackends -- check for other backends running in the given DB
3594  *
3595  * If there are other backends in the DB, we will wait a maximum of 5 seconds
3596  * for them to exit. Autovacuum backends are encouraged to exit early by
3597  * sending them SIGTERM, but normal user backends are just waited for.
3598  *
3599  * The current backend is always ignored; it is caller's responsibility to
3600  * check whether the current backend uses the given DB, if it's important.
3601  *
3602  * Returns true if there are (still) other backends in the DB, false if not.
3603  * Also, *nbackends and *nprepared are set to the number of other backends
3604  * and prepared transactions in the DB, respectively.
3605  *
3606  * This function is used to interlock DROP DATABASE and related commands
3607  * against there being any active backends in the target DB --- dropping the
3608  * DB while active backends remain would be a Bad Thing. Note that we cannot
3609  * detect here the possibility of a newly-started backend that is trying to
3610  * connect to the doomed database, so additional interlocking is needed during
3611  * backend startup. The caller should normally hold an exclusive lock on the
3612  * target DB before calling this, which is one reason we mustn't wait
3613  * indefinitely.
3614  */
3615 bool
3616 CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
3617 {
3618  ProcArrayStruct *arrayP = procArray;
3619 
3620 #define MAXAUTOVACPIDS 10 /* max autovacs to SIGTERM per iteration */
3621  int autovac_pids[MAXAUTOVACPIDS];
3622  int tries;
3623 
3624  /* 50 tries with 100ms sleep between tries makes 5 sec total wait */
3625  for (tries = 0; tries < 50; tries++)
3626  {
3627  int nautovacs = 0;
3628  bool found = false;
3629  int index;
3630 
3632 
3633  *nbackends = *nprepared = 0;
3634 
3635  LWLockAcquire(ProcArrayLock, LW_SHARED);
3636 
3637  for (index = 0; index < arrayP->numProcs; index++)
3638  {
3639  int pgprocno = arrayP->pgprocnos[index];
3640  PGPROC *proc = &allProcs[pgprocno];
3641  uint8 statusFlags = ProcGlobal->statusFlags[index];
3642 
3643  if (proc->databaseId != databaseId)
3644  continue;
3645  if (proc == MyProc)
3646  continue;
3647 
3648  found = true;
3649 
3650  if (proc->pid == 0)
3651  (*nprepared)++;
3652  else
3653  {
3654  (*nbackends)++;
3655  if ((statusFlags & PROC_IS_AUTOVACUUM) &&
3656  nautovacs < MAXAUTOVACPIDS)
3657  autovac_pids[nautovacs++] = proc->pid;
3658  }
3659  }
3660 
3661  LWLockRelease(ProcArrayLock);
3662 
3663  if (!found)
3664  return false; /* no conflicting backends, so done */
3665 
3666  /*
3667  * Send SIGTERM to any conflicting autovacuums before sleeping. We
3668  * postpone this step until after the loop because we don't want to
3669  * hold ProcArrayLock while issuing kill(). We have no idea what might
3670  * block kill() inside the kernel...
3671  */
3672  for (index = 0; index < nautovacs; index++)
3673  (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */
3674 
3675  /* sleep, then try again */
3676  pg_usleep(100 * 1000L); /* 100ms */
3677  }
3678 
3679  return true; /* timed out, still conflicts */
3680 }
3681 
3682 /*
3683  * Terminate existing connections to the specified database. This routine
3684  * is used by the DROP DATABASE command when user has asked to forcefully
3685  * drop the database.
3686  *
3687  * The current backend is always ignored; it is caller's responsibility to
3688  * check whether the current backend uses the given DB, if it's important.
3689  *
3690  * It doesn't allow to terminate the connections even if there is a one
3691  * backend with the prepared transaction in the target database.
3692  */
3693 void
3695 {
3696  ProcArrayStruct *arrayP = procArray;
3697  List *pids = NIL;
3698  int nprepared = 0;
3699  int i;
3700 
3701  LWLockAcquire(ProcArrayLock, LW_SHARED);
3702 
3703  for (i = 0; i < procArray->numProcs; i++)
3704  {
3705  int pgprocno = arrayP->pgprocnos[i];
3706  PGPROC *proc = &allProcs[pgprocno];
3707 
3708  if (proc->databaseId != databaseId)
3709  continue;
3710  if (proc == MyProc)
3711  continue;
3712 
3713  if (proc->pid != 0)
3714  pids = lappend_int(pids, proc->pid);
3715  else
3716  nprepared++;
3717  }
3718 
3719  LWLockRelease(ProcArrayLock);
3720 
3721  if (nprepared > 0)
3722  ereport(ERROR,
3723  (errcode(ERRCODE_OBJECT_IN_USE),
3724  errmsg("database \"%s\" is being used by prepared transactions",
3725  get_database_name(databaseId)),
3726  errdetail_plural("There is %d prepared transaction using the database.",
3727  "There are %d prepared transactions using the database.",
3728  nprepared,
3729  nprepared)));
3730 
3731  if (pids)
3732  {
3733  ListCell *lc;
3734 
3735  /*
3736  * Check whether we have the necessary rights to terminate other
3737  * sessions. We don't terminate any session until we ensure that we
3738  * have rights on all the sessions to be terminated. These checks are
3739  * the same as we do in pg_terminate_backend.
3740  *
3741  * In this case we don't raise some warnings - like "PID %d is not a
3742  * PostgreSQL server process", because for us already finished session
3743  * is not a problem.
3744  */
3745  foreach(lc, pids)
3746  {
3747  int pid = lfirst_int(lc);
3748  PGPROC *proc = BackendPidGetProc(pid);
3749 
3750  if (proc != NULL)
3751  {
3752  /* Only allow superusers to signal superuser-owned backends. */
3753  if (superuser_arg(proc->roleId) && !superuser())
3754  ereport(ERROR,
3755  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3756  errmsg("must be a superuser to terminate superuser process")));
3757 
3758  /* Users can signal backends they have role membership in. */
3759  if (!has_privs_of_role(GetUserId(), proc->roleId) &&
3760  !has_privs_of_role(GetUserId(), ROLE_PG_SIGNAL_BACKEND))
3761  ereport(ERROR,
3762  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3763  errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend")));
3764  }
3765  }
3766 
3767  /*
3768  * There's a race condition here: once we release the ProcArrayLock,
3769  * it's possible for the session to exit before we issue kill. That
3770  * race condition possibility seems too unlikely to worry about. See
3771  * pg_signal_backend.
3772  */
3773  foreach(lc, pids)
3774  {
3775  int pid = lfirst_int(lc);
3776  PGPROC *proc = BackendPidGetProc(pid);
3777 
3778  if (proc != NULL)
3779  {
3780  /*
3781  * If we have setsid(), signal the backend's whole process
3782  * group
3783  */
3784 #ifdef HAVE_SETSID
3785  (void) kill(-pid, SIGTERM);
3786 #else
3787  (void) kill(pid, SIGTERM);
3788 #endif
3789  }
3790  }
3791  }
3792 }
3793 
3794 /*
3795  * ProcArraySetReplicationSlotXmin
3796  *
3797  * Install limits to future computations of the xmin horizon to prevent vacuum
3798  * and HOT pruning from removing affected rows still needed by clients with
3799  * replication slots.
3800  */
3801 void
3803  bool already_locked)
3804 {
3805  Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
3806 
3807  if (!already_locked)
3808  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3809 
3810  procArray->replication_slot_xmin = xmin;
3811  procArray->replication_slot_catalog_xmin = catalog_xmin;
3812 
3813  if (!already_locked)
3814  LWLockRelease(ProcArrayLock);
3815 }
3816 
3817 /*
3818  * ProcArrayGetReplicationSlotXmin
3819  *
3820  * Return the current slot xmin limits. That's useful to be able to remove
3821  * data that's older than those limits.
3822  */
3823 void
3825  TransactionId *catalog_xmin)
3826 {
3827  LWLockAcquire(ProcArrayLock, LW_SHARED);
3828 
3829  if (xmin != NULL)
3830  *xmin = procArray->replication_slot_xmin;
3831 
3832  if (catalog_xmin != NULL)
3833  *catalog_xmin = procArray->replication_slot_catalog_xmin;
3834 
3835  LWLockRelease(ProcArrayLock);
3836 }
3837 
3838 /*
3839  * XidCacheRemoveRunningXids
3840  *
3841  * Remove a bunch of TransactionIds from the list of known-running
3842  * subtransactions for my backend. Both the specified xid and those in
3843  * the xids[] array (of length nxids) are removed from the subxids cache.
3844  * latestXid must be the latest XID among the group.
3845  */
3846 void
3848  int nxids, const TransactionId *xids,
3849  TransactionId latestXid)
3850 {
3851  int i,
3852  j;
3853  XidCacheStatus *mysubxidstat;
3854 
3856 
3857  /*
3858  * We must hold ProcArrayLock exclusively in order to remove transactions
3859  * from the PGPROC array. (See src/backend/access/transam/README.) It's
3860  * possible this could be relaxed since we know this routine is only used
3861  * to abort subtransactions, but pending closer analysis we'd best be
3862  * conservative.
3863  *
3864  * Note that we do not have to be careful about memory ordering of our own
3865  * reads wrt. GetNewTransactionId() here - only this process can modify
3866  * relevant fields of MyProc/ProcGlobal->xids[]. But we do have to be
3867  * careful about our own writes being well ordered.
3868  */
3869  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3870 
3871  mysubxidstat = &ProcGlobal->subxidStates[MyProc->pgxactoff];
3872 
3873  /*
3874  * Under normal circumstances xid and xids[] will be in increasing order,
3875  * as will be the entries in subxids. Scan backwards to avoid O(N^2)
3876  * behavior when removing a lot of xids.
3877  */
3878  for (i = nxids - 1; i >= 0; i--)
3879  {
3880  TransactionId anxid = xids[i];
3881 
3882  for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
3883  {
3884  if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
3885  {
3887  pg_write_barrier();
3888  mysubxidstat->count--;
3890  break;
3891  }
3892  }
3893 
3894  /*
3895  * Ordinarily we should have found it, unless the cache has
3896  * overflowed. However it's also possible for this routine to be
3897  * invoked multiple times for the same subtransaction, in case of an
3898  * error during AbortSubTransaction. So instead of Assert, emit a
3899  * debug warning.
3900  */
3901  if (j < 0 && !MyProc->subxidStatus.overflowed)
3902  elog(WARNING, "did not find subXID %u in MyProc", anxid);
3903  }
3904 
3905  for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
3906  {
3907  if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
3908  {
3910  pg_write_barrier();
3911  mysubxidstat->count--;
3913  break;
3914  }
3915  }
3916  /* Ordinarily we should have found it, unless the cache has overflowed */
3917  if (j < 0 && !MyProc->subxidStatus.overflowed)
3918  elog(WARNING, "did not find subXID %u in MyProc", xid);
3919 
3920  /* Also advance global latestCompletedXid while holding the lock */
3921  MaintainLatestCompletedXid(latestXid);
3922 
3923  /* ... and xactCompletionCount */
3925 
3926  LWLockRelease(ProcArrayLock);
3927 }
3928 
3929 #ifdef XIDCACHE_DEBUG
3930 
3931 /*
3932  * Print stats about effectiveness of XID cache
3933  */
3934 static void
3935 DisplayXidCache(void)
3936 {
3937  fprintf(stderr,
3938  "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n",
3939  xc_by_recent_xmin,
3940  xc_by_known_xact,
3941  xc_by_my_xact,
3942  xc_by_latest_xid,
3943  xc_by_main_xid,
3944  xc_by_child_xid,
3945  xc_by_known_assigned,
3946  xc_no_overflow,
3947  xc_slow_answer);
3948 }
3949 #endif /* XIDCACHE_DEBUG */
3950 
3951 /*
3952  * If rel != NULL, return test state appropriate for relation, otherwise
3953  * return state usable for all relations. The latter may consider XIDs as
3954  * not-yet-visible-to-everyone that a state for a specific relation would
3955  * already consider visible-to-everyone.
3956  *
3957  * This needs to be called while a snapshot is active or registered, otherwise
3958  * there are wraparound and other dangers.
3959  *
3960  * See comment for GlobalVisState for details.
3961  */
3964 {
3965  bool need_shared;
3966  bool need_catalog;
3968 
3969  /* XXX: we should assert that a snapshot is pushed or registered */
3970  Assert(RecentXmin);
3971 
3972  if (!rel)
3973  need_shared = need_catalog = true;
3974  else
3975  {
3976  /*
3977  * Other kinds currently don't contain xids, nor always the necessary
3978  * logical decoding markers.
3979  */
3980  Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
3981  rel->rd_rel->relkind == RELKIND_MATVIEW ||
3982  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
3983 
3984  need_shared = rel->rd_rel->relisshared || RecoveryInProgress();
3985  need_catalog = IsCatalogRelation(rel) || RelationIsAccessibleInLogicalDecoding(rel);
3986  }
3987 
3988  if (need_shared)
3989  state = &GlobalVisSharedRels;
3990  else if (need_catalog)
3991  state = &GlobalVisCatalogRels;
3992  else if (RELATION_IS_LOCAL(rel))
3993  state = &GlobalVisTempRels;
3994  else
3995  state = &GlobalVisDataRels;
3996 
3999 
4000  return state;
4001 }
4002 
4003 /*
4004  * Return true if it's worth updating the accurate maybe_needed boundary.
4005  *
4006  * As it is somewhat expensive to determine xmin horizons, we don't want to
4007  * repeatedly do so when there is a low likelihood of it being beneficial.
4008  *
4009  * The current heuristic is that we update only if RecentXmin has changed
4010  * since the last update. If the oldest currently running transaction has not
4011  * finished, it is unlikely that recomputing the horizon would be useful.
4012  */
4013 static bool
4015 {
4016  /* hasn't been updated yet */
4018  return true;
4019 
4020  /*
4021  * If the maybe_needed/definitely_needed boundaries are the same, it's
4022  * unlikely to be beneficial to refresh boundaries.
4023  */
4025  state->definitely_needed))
4026  return false;
4027 
4028  /* does the last snapshot built have a different xmin? */
4030 }
4031 
4032 static void
4034 {
4035  GlobalVisSharedRels.maybe_needed =
4037  horizons->shared_oldest_nonremovable);
4038  GlobalVisCatalogRels.maybe_needed =
4040  horizons->catalog_oldest_nonremovable);
4041  GlobalVisDataRels.maybe_needed =
4043  horizons->data_oldest_nonremovable);
4044  GlobalVisTempRels.maybe_needed =
4046  horizons->temp_oldest_nonremovable);
4047 
4048  /*
4049  * In longer running transactions it's possible that transactions we
4050  * previously needed to treat as running aren't around anymore. So update
4051  * definitely_needed to not be earlier than maybe_needed.
4052  */
4053  GlobalVisSharedRels.definitely_needed =
4054  FullTransactionIdNewer(GlobalVisSharedRels.maybe_needed,
4055  GlobalVisSharedRels.definitely_needed);
4056  GlobalVisCatalogRels.definitely_needed =
4057  FullTransactionIdNewer(GlobalVisCatalogRels.maybe_needed,
4058  GlobalVisCatalogRels.definitely_needed);
4059  GlobalVisDataRels.definitely_needed =
4060  FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
4061  GlobalVisDataRels.definitely_needed);
4062  GlobalVisTempRels.definitely_needed = GlobalVisTempRels.maybe_needed;
4063 
4065 }
4066 
4067 /*
4068  * Update boundaries in GlobalVis{Shared,Catalog, Data}Rels
4069  * using ComputeXidHorizons().
4070  */
4071 static void
4073 {
4074  ComputeXidHorizonsResult horizons;
4075 
4076  /* updates the horizons as a side-effect */
4077  ComputeXidHorizons(&horizons);
4078 }
4079 
4080 /*
4081  * Return true if no snapshot still considers fxid to be running.
4082  *
4083  * The state passed needs to have been initialized for the relation fxid is
4084  * from (NULL is also OK), otherwise the result may not be correct.
4085  *
4086  * See comment for GlobalVisState for details.
4087  */
4088 bool
4090  FullTransactionId fxid)
4091 {
4092  /*
4093  * If fxid is older than maybe_needed bound, it definitely is visible to
4094  * everyone.
4095  */
4096  if (FullTransactionIdPrecedes(fxid, state->maybe_needed))
4097  return true;
4098 
4099  /*
4100  * If fxid is >= definitely_needed bound, it is very likely to still be
4101  * considered running.
4102  */
4104  return false;
4105 
4106  /*
4107  * fxid is between maybe_needed and definitely_needed, i.e. there might or
4108  * might not exist a snapshot considering fxid running. If it makes sense,
4109  * update boundaries and recheck.
4110  */
4111  if (GlobalVisTestShouldUpdate(state))
4112  {
4113  GlobalVisUpdate();
4114 
4116 
4117  return FullTransactionIdPrecedes(fxid, state->maybe_needed);
4118  }
4119  else
4120  return false;
4121 }
4122 
4123 /*
4124  * Wrapper around GlobalVisTestIsRemovableFullXid() for 32bit xids.
4125  *
4126  * It is crucial that this only gets called for xids from a source that
4127  * protects against xid wraparounds (e.g. from a table and thus protected by
4128  * relfrozenxid).
4129  */
4130 bool
4132 {
4133  FullTransactionId fxid;
4134 
4135  /*
4136  * Convert 32 bit argument to FullTransactionId. We can do so safely
4137  * because we know the xid has to, at the very least, be between
4138  * [oldestXid, nextFullXid), i.e. within 2 billion of xid. To avoid taking
4139  * a lock to determine either, we can just compare with
4140  * state->definitely_needed, which was based on those value at the time
4141  * the current snapshot was built.
4142  */
4143  fxid = FullXidRelativeTo(state->definitely_needed, xid);
4144 
4145  return GlobalVisTestIsRemovableFullXid(state, fxid);
4146 }
4147 
4148 /*
4149  * Return FullTransactionId below which all transactions are not considered
4150  * running anymore.
4151  *
4152  * Note: This is less efficient than testing with
4153  * GlobalVisTestIsRemovableFullXid as it likely requires building an accurate
4154  * cutoff, even in the case all the XIDs compared with the cutoff are outside
4155  * [maybe_needed, definitely_needed).
4156  */
4159 {
4160  /* acquire accurate horizon if not already done */
4161  if (GlobalVisTestShouldUpdate(state))
4162  GlobalVisUpdate();
4163 
4164  return state->maybe_needed;
4165 }
4166 
4167 /* Convenience wrapper around GlobalVisTestNonRemovableFullHorizon */
4170 {
4171  FullTransactionId cutoff;
4172 
4173  cutoff = GlobalVisTestNonRemovableFullHorizon(state);
4174 
4175  return XidFromFullTransactionId(cutoff);
4176 }
4177 
4178 /*
4179  * Convenience wrapper around GlobalVisTestFor() and
4180  * GlobalVisTestIsRemovableFullXid(), see their comments.
4181  */
4182 bool
4184 {
4186 
4187  state = GlobalVisTestFor(rel);
4188 
4189  return GlobalVisTestIsRemovableFullXid(state, fxid);
4190 }
4191 
4192 /*
4193  * Convenience wrapper around GlobalVisTestFor() and
4194  * GlobalVisTestIsRemovableXid(), see their comments.
4195  */
4196 bool
4198 {
4200 
4201  state = GlobalVisTestFor(rel);
4202 
4203  return GlobalVisTestIsRemovableXid(state, xid);
4204 }
4205 
4206 /*
4207  * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it
4208  * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel).
4209  *
4210  * Be very careful about when to use this function. It can only safely be used
4211  * when there is a guarantee that xid is within MaxTransactionId / 2 xids of
4212  * rel. That e.g. can be guaranteed if the caller assures a snapshot is
4213  * held by the backend and xid is from a table (where vacuum/freezing ensures
4214  * the xid has to be within that range), or if xid is from the procarray and
4215  * prevents xid wraparound that way.
4216  */
4217 static inline FullTransactionId
4219 {
4220  TransactionId rel_xid = XidFromFullTransactionId(rel);
4221 
4223  Assert(TransactionIdIsValid(rel_xid));
4224 
4225  /* not guaranteed to find issues, but likely to catch mistakes */
4227 
4229  + (int32) (xid - rel_xid));
4230 }
4231 
4232 
4233 /* ----------------------------------------------
4234  * KnownAssignedTransactionIds sub-module
4235  * ----------------------------------------------
4236  */
4237 
4238 /*
4239  * In Hot Standby mode, we maintain a list of transactions that are (or were)
4240  * running on the primary at the current point in WAL. These XIDs must be
4241  * treated as running by standby transactions, even though they are not in
4242  * the standby server's PGPROC array.
4243  *
4244  * We record all XIDs that we know have been assigned. That includes all the
4245  * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
4246  * been assigned. We can deduce the existence of unobserved XIDs because we
4247  * know XIDs are assigned in sequence, with no gaps. The KnownAssignedXids
4248  * list expands as new XIDs are observed or inferred, and contracts when
4249  * transaction completion records arrive.
4250  *
4251  * During hot standby we do not fret too much about the distinction between
4252  * top-level XIDs and subtransaction XIDs. We store both together in the
4253  * KnownAssignedXids list. In backends, this is copied into snapshots in
4254  * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot()
4255  * doesn't care about the distinction either. Subtransaction XIDs are
4256  * effectively treated as top-level XIDs and in the typical case pg_subtrans
4257  * links are *not* maintained (which does not affect visibility).
4258  *
4259  * We have room in KnownAssignedXids and in snapshots to hold maxProcs *
4260  * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every primary transaction must
4261  * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at
4262  * least every PGPROC_MAX_CACHED_SUBXIDS. When we receive one of these
4263  * records, we mark the subXIDs as children of the top XID in pg_subtrans,
4264  * and then remove them from KnownAssignedXids. This prevents overflow of
4265  * KnownAssignedXids and snapshots, at the cost that status checks for these
4266  * subXIDs will take a slower path through TransactionIdIsInProgress().
4267  * This means that KnownAssignedXids is not necessarily complete for subXIDs,
4268  * though it should be complete for top-level XIDs; this is the same situation
4269  * that holds with respect to the PGPROC entries in normal running.
4270  *
4271  * When we throw away subXIDs from KnownAssignedXids, we need to keep track of
4272  * that, similarly to tracking overflow of a PGPROC's subxids array. We do
4273  * that by remembering the lastOverflowedXid, ie the last thrown-away subXID.
4274  * As long as that is within the range of interesting XIDs, we have to assume
4275  * that subXIDs are missing from snapshots. (Note that subXID overflow occurs
4276  * on primary when 65th subXID arrives, whereas on standby it occurs when 64th
4277  * subXID arrives - that is not an error.)
4278  *
4279  * Should a backend on primary somehow disappear before it can write an abort
4280  * record, then we just leave those XIDs in KnownAssignedXids. They actually
4281  * aborted but we think they were running; the distinction is irrelevant
4282  * because either way any changes done by the transaction are not visible to
4283  * backends in the standby. We prune KnownAssignedXids when
4284  * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the
4285  * array due to such dead XIDs.
4286  */
4287 
4288 /*
4289  * RecordKnownAssignedTransactionIds
4290  * Record the given XID in KnownAssignedXids, as well as any preceding
4291  * unobserved XIDs.
4292  *
4293  * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
4294  * associated with a transaction. Must be called for each record after we
4295  * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
4296  *
4297  * Called during recovery in analogy with and in place of GetNewTransactionId()
4298  */
4299 void
4301 {
4305 
4306  elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
4307  xid, latestObservedXid);
4308 
4309  /*
4310  * When a newly observed xid arrives, it is frequently the case that it is
4311  * *not* the next xid in sequence. When this occurs, we must treat the
4312  * intervening xids as running also.
4313  */
4315  {
4316  TransactionId next_expected_xid;
4317 
4318  /*
4319  * Extend subtrans like we do in GetNewTransactionId() during normal
4320  * operation using individual extend steps. Note that we do not need
4321  * to extend clog since its extensions are WAL logged.
4322  *
4323  * This part has to be done regardless of standbyState since we
4324  * immediately start assigning subtransactions to their toplevel
4325  * transactions.
4326  */
4327  next_expected_xid = latestObservedXid;
4328  while (TransactionIdPrecedes(next_expected_xid, xid))
4329  {
4330  TransactionIdAdvance(next_expected_xid);
4331  ExtendSUBTRANS(next_expected_xid);
4332  }
4333  Assert(next_expected_xid == xid);
4334 
4335  /*
4336  * If the KnownAssignedXids machinery isn't up yet, there's nothing
4337  * more to do since we don't track assigned xids yet.
4338  */
4340  {
4341  latestObservedXid = xid;
4342  return;
4343  }
4344 
4345  /*
4346  * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
4347  */
4348  next_expected_xid = latestObservedXid;
4349  TransactionIdAdvance(next_expected_xid);
4350  KnownAssignedXidsAdd(next_expected_xid, xid, false);
4351 
4352  /*
4353  * Now we can advance latestObservedXid
4354  */
4355  latestObservedXid = xid;
4356 
4357  /* ShmemVariableCache->nextXid must be beyond any observed xid */
4359  next_expected_xid = latestObservedXid;
4360  TransactionIdAdvance(next_expected_xid);
4361  }
4362 }
4363 
4364 /*
4365  * ExpireTreeKnownAssignedTransactionIds
4366  * Remove the given XIDs from KnownAssignedXids.
4367  *
4368  * Called during recovery in analogy with and in place of ProcArrayEndTransaction()
4369  */
4370 void
4372  TransactionId *subxids, TransactionId max_xid)
4373 {
4375 
4376  /*
4377  * Uses same locking as transaction commit
4378  */
4379  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4380 
4381  KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
4382 
4383  /* As in ProcArrayEndTransaction, advance latestCompletedXid */
4385 
4386  /* ... and xactCompletionCount */
4388 
4389  LWLockRelease(ProcArrayLock);
4390 }
4391 
4392 /*
4393  * ExpireAllKnownAssignedTransactionIds
4394  * Remove all entries in KnownAssignedXids
4395  */
4396 void
4398 {
4399  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4401  LWLockRelease(ProcArrayLock);
4402 }
4403 
4404 /*
4405  * ExpireOldKnownAssignedTransactionIds
4406  * Remove KnownAssignedXids entries preceding the given XID
4407  */
4408 void
4410 {
4411  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4413  LWLockRelease(ProcArrayLock);
4414 }
4415 
4416 
4417 /*
4418  * Private module functions to manipulate KnownAssignedXids
4419  *
4420  * There are 5 main uses of the KnownAssignedXids data structure:
4421  *
4422  * * backends taking snapshots - all valid XIDs need to be copied out
4423  * * backends seeking to determine presence of a specific XID
4424  * * startup process adding new known-assigned XIDs
4425  * * startup process removing specific XIDs as transactions end
4426  * * startup process pruning array when special WAL records arrive
4427  *
4428  * This data structure is known to be a hot spot during Hot Standby, so we
4429  * go to some lengths to make these operations as efficient and as concurrent
4430  * as possible.
4431  *
4432  * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes
4433  * order, to be exact --- to allow binary search for specific XIDs. Note:
4434  * in general TransactionIdPrecedes would not provide a total order, but
4435  * we know that the entries present at any instant should not extend across
4436  * a large enough fraction of XID space to wrap around (the primary would
4437  * shut down for fear of XID wrap long before that happens). So it's OK to
4438  * use TransactionIdPrecedes as a binary-search comparator.
4439  *
4440  * It's cheap to maintain the sortedness during insertions, since new known
4441  * XIDs are always reported in XID order; we just append them at the right.
4442  *
4443  * To keep individual deletions cheap, we need to allow gaps in the array.
4444  * This is implemented by marking array elements as valid or invalid using
4445  * the parallel boolean array KnownAssignedXidsValid[]. A deletion is done
4446  * by setting KnownAssignedXidsValid[i] to false, *without* clearing the
4447  * XID entry itself. This preserves the property that the XID entries are
4448  * sorted, so we can do binary searches easily. Periodically we compress
4449  * out the unused entries; that's much cheaper than having to compress the
4450  * array immediately on every deletion.
4451  *
4452  * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
4453  * are those with indexes tail <= i < head; items outside this subscript range
4454  * have unspecified contents. When head reaches the end of the array, we
4455  * force compression of unused entries rather than wrapping around, since
4456  * allowing wraparound would greatly complicate the search logic. We maintain
4457  * an explicit tail pointer so that pruning of old XIDs can be done without
4458  * immediately moving the array contents. In most cases only a small fraction
4459  * of the array contains valid entries at any instant.
4460  *
4461  * Although only the startup process can ever change the KnownAssignedXids
4462  * data structure, we still need interlocking so that standby backends will
4463  * not observe invalid intermediate states. The convention is that backends
4464  * must hold shared ProcArrayLock to examine the array. To remove XIDs from
4465  * the array, the startup process must hold ProcArrayLock exclusively, for
4466  * the usual transactional reasons (compare commit/abort of a transaction
4467  * during normal running). Compressing unused entries out of the array
4468  * likewise requires exclusive lock. To add XIDs to the array, we just insert
4469  * them into slots to the right of the head pointer and then advance the head
4470  * pointer. This wouldn't require any lock at all, except that on machines
4471  * with weak memory ordering we need to be careful that other processors
4472  * see the array element changes before they see the head pointer change.
4473  * We handle this by using a spinlock to protect reads and writes of the
4474  * head/tail pointers. (We could dispense with the spinlock if we were to
4475  * create suitable memory access barrier primitives and use those instead.)
4476  * The spinlock must be taken to read or write the head/tail pointers unless
4477  * the caller holds ProcArrayLock exclusively.
4478  *
4479  * Algorithmic analysis:
4480  *
4481  * If we have a maximum of M slots, with N XIDs currently spread across
4482  * S elements then we have N <= S <= M always.
4483  *
4484  * * Adding a new XID is O(1) and needs little locking (unless compression
4485  * must happen)
4486  * * Compressing the array is O(S) and requires exclusive lock
4487  * * Removing an XID is O(logS) and requires exclusive lock
4488  * * Taking a snapshot is O(S) and requires shared lock
4489  * * Checking for an XID is O(logS) and requires shared lock
4490  *
4491  * In comparison, using a hash table for KnownAssignedXids would mean that
4492  * taking snapshots would be O(M). If we can maintain S << M then the
4493  * sorted array technique will deliver significantly faster snapshots.
4494  * If we try to keep S too small then we will spend too much time compressing,
4495  * so there is an optimal point for any workload mix. We use a heuristic to
4496  * decide when to compress the array, though trimming also helps reduce
4497  * frequency of compressing. The heuristic requires us to track the number of
4498  * currently valid XIDs in the array.
4499  */
4500 
4501 
4502 /*
4503  * Compress KnownAssignedXids by shifting valid data down to the start of the
4504  * array, removing any gaps.
4505  *
4506  * A compression step is forced if "force" is true, otherwise we do it
4507  * only if a heuristic indicates it's a good time to do it.
4508  *
4509  * Caller must hold ProcArrayLock in exclusive mode.
4510  */
4511 static void
4513 {
4514  ProcArrayStruct *pArray = procArray;
4515  int head,
4516  tail;
4517  int compress_index;
4518  int i;
4519 
4520  /* no spinlock required since we hold ProcArrayLock exclusively */
4521  head = pArray->headKnownAssignedXids;
4522  tail = pArray->tailKnownAssignedXids;
4523 
4524  if (!force)
4525  {
4526  /*
4527  * If we can choose how much to compress, use a heuristic to avoid
4528  * compressing too often or not often enough.
4529  *
4530  * Heuristic is if we have a large enough current spread and less than
4531  * 50% of the elements are currently in use, then compress. This
4532  * should ensure we compress fairly infrequently. We could compress
4533  * less often though the virtual array would spread out more and
4534  * snapshots would become more expensive.
4535  */
4536  int nelements = head - tail;
4537 
4538  if (nelements < 4 * PROCARRAY_MAXPROCS ||
4539  nelements < 2 * pArray->numKnownAssignedXids)
4540  return;
4541  }
4542 
4543  /*
4544  * We compress the array by reading the valid values from tail to head,
4545  * re-aligning data to 0th element.
4546  */
4547  compress_index = 0;
4548  for (i = tail; i < head; i++)
4549  {
4550  if (KnownAssignedXidsValid[i])
4551  {
4552  KnownAssignedXids[compress_index] = KnownAssignedXids[i];
4553  KnownAssignedXidsValid[compress_index] = true;
4554  compress_index++;
4555  }
4556  }
4557 
4558  pArray->tailKnownAssignedXids = 0;
4559  pArray->headKnownAssignedXids = compress_index;
4560 }
4561 
4562 /*
4563  * Add xids into KnownAssignedXids at the head of the array.
4564  *
4565  * xids from from_xid to to_xid, inclusive, are added to the array.
4566  *
4567  * If exclusive_lock is true then caller already holds ProcArrayLock in
4568  * exclusive mode, so we need no extra locking here. Else caller holds no
4569  * lock, so we need to be sure we maintain sufficient interlocks against
4570  * concurrent readers. (Only the startup process ever calls this, so no need
4571  * to worry about concurrent writers.)
4572  */
4573 static void
4575  bool exclusive_lock)
4576 {
4577  ProcArrayStruct *pArray = procArray;
4578  TransactionId next_xid;
4579  int head,
4580  tail;
4581  int nxids;
4582  int i;
4583 
4584  Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
4585 
4586  /*
4587  * Calculate how many array slots we'll need. Normally this is cheap; in
4588  * the unusual case where the XIDs cross the wrap point, we do it the hard
4589  * way.
4590  */
4591  if (to_xid >= from_xid)
4592  nxids = to_xid - from_xid + 1;
4593  else
4594  {
4595  nxids = 1;
4596  next_xid = from_xid;
4597  while (TransactionIdPrecedes(next_xid, to_xid))
4598  {
4599  nxids++;
4600  TransactionIdAdvance(next_xid);
4601  }
4602  }
4603 
4604  /*
4605  * Since only the startup process modifies the head/tail pointers, we
4606  * don't need a lock to read them here.
4607  */
4608  head = pArray->headKnownAssignedXids;
4609  tail = pArray->tailKnownAssignedXids;
4610 
4611  Assert(head >= 0 && head <= pArray->maxKnownAssignedXids);
4612  Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids);
4613 
4614  /*
4615  * Verify that insertions occur in TransactionId sequence. Note that even
4616  * if the last existing element is marked invalid, it must still have a
4617  * correctly sequenced XID value.
4618  */
4619  if (head > tail &&
4620  TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid))
4621  {
4623  elog(ERROR, "out-of-order XID insertion in KnownAssignedXids");
4624  }
4625 
4626  /*
4627  * If our xids won't fit in the remaining space, compress out free space
4628  */
4629  if (head + nxids > pArray->maxKnownAssignedXids)
4630  {
4631  /* must hold lock to compress */
4632  if (!exclusive_lock)
4633  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4634 
4636 
4637  head = pArray->headKnownAssignedXids;
4638  /* note: we no longer care about the tail pointer */
4639 
4640  if (!exclusive_lock)
4641  LWLockRelease(ProcArrayLock);
4642 
4643  /*
4644  * If it still won't fit then we're out of memory
4645  */
4646  if (head + nxids > pArray->maxKnownAssignedXids)
4647  elog(ERROR, "too many KnownAssignedXids");
4648  }
4649 
4650  /* Now we can insert the xids into the space starting at head */
4651  next_xid = from_xid;
4652  for (i = 0; i < nxids; i++)
4653  {
4654  KnownAssignedXids[head] = next_xid;
4655  KnownAssignedXidsValid[head] = true;
4656  TransactionIdAdvance(next_xid);
4657  head++;
4658  }
4659 
4660  /* Adjust count of number of valid entries */
4661  pArray->numKnownAssignedXids += nxids;
4662 
4663  /*
4664  * Now update the head pointer. We use a spinlock to protect this
4665  * pointer, not because the update is likely to be non-atomic, but to
4666  * ensure that other processors see the above array updates before they
4667  * see the head pointer change.
4668  *
4669  * If we're holding ProcArrayLock exclusively, there's no need to take the
4670  * spinlock.
4671  */
4672  if (exclusive_lock)
4673  pArray->headKnownAssignedXids = head;
4674  else
4675  {
4677  pArray->headKnownAssignedXids = head;
4679  }
4680 }
4681 
4682 /*
4683  * KnownAssignedXidsSearch
4684  *
4685  * Searches KnownAssignedXids for a specific xid and optionally removes it.
4686  * Returns true if it was found, false if not.
4687  *
4688  * Caller must hold ProcArrayLock in shared or exclusive mode.
4689  * Exclusive lock must be held for remove = true.
4690  */
4691 static bool
4693 {
4694  ProcArrayStruct *pArray = procArray;
4695  int first,
4696  last;
4697  int head;
4698  int tail;
4699  int result_index = -1;
4700 
4701  if (remove)
4702  {
4703  /* we hold ProcArrayLock exclusively, so no need for spinlock */
4704  tail = pArray->tailKnownAssignedXids;
4705  head = pArray->headKnownAssignedXids;
4706  }
4707  else
4708  {
4709  /* take spinlock to ensure we see up-to-date array contents */
4711  tail = pArray->tailKnownAssignedXids;
4712  head = pArray->headKnownAssignedXids;
4714  }
4715 
4716  /*
4717  * Standard binary search. Note we can ignore the KnownAssignedXidsValid
4718  * array here, since even invalid entries will contain sorted XIDs.
4719  */
4720  first = tail;
4721  last = head - 1;
4722  while (first <= last)
4723  {
4724  int mid_index;
4725  TransactionId mid_xid;
4726 
4727  mid_index = (first + last) / 2;
4728  mid_xid = KnownAssignedXids[mid_index];
4729 
4730  if (xid == mid_xid)
4731  {
4732  result_index = mid_index;
4733  break;
4734  }
4735  else if (TransactionIdPrecedes(xid, mid_xid))
4736  last = mid_index - 1;
4737  else
4738  first = mid_index + 1;
4739  }
4740 
4741  if (result_index < 0)
4742  return false; /* not in array */
4743 
4744  if (!KnownAssignedXidsValid[result_index])
4745  return false; /* in array, but invalid */
4746 
4747  if (remove)
4748  {
4749  KnownAssignedXidsValid[result_index] = false;
4750 
4751  pArray->numKnownAssignedXids--;
4752  Assert(pArray->numKnownAssignedXids >= 0);
4753 
4754  /*
4755  * If we're removing the tail element then advance tail pointer over
4756  * any invalid elements. This will speed future searches.
4757  */
4758  if (result_index == tail)
4759  {
4760  tail++;
4761  while (tail < head && !KnownAssignedXidsValid[tail])
4762  tail++;
4763  if (tail >= head)
4764  {
4765  /* Array is empty, so we can reset both pointers */
4766  pArray->headKnownAssignedXids = 0;
4767  pArray->tailKnownAssignedXids = 0;
4768  }
4769  else
4770  {
4771  pArray->tailKnownAssignedXids = tail;
4772  }
4773  }
4774  }
4775 
4776  return true;
4777 }
4778 
4779 /*
4780  * Is the specified XID present in KnownAssignedXids[]?
4781  *
4782  * Caller must hold ProcArrayLock in shared or exclusive mode.
4783  */
4784 static bool
4786 {
4788 
4789  return KnownAssignedXidsSearch(xid, false);
4790 }
4791 
4792 /*
4793  * Remove the specified XID from KnownAssignedXids[].
4794  *
4795  * Caller must hold ProcArrayLock in exclusive mode.
4796  */
4797 static void
4799 {
4801 
4802  elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
4803 
4804  /*
4805  * Note: we cannot consider it an error to remove an XID that's not
4806  * present. We intentionally remove subxact IDs while processing
4807  * XLOG_XACT_ASSIGNMENT, to avoid array overflow. Then those XIDs will be
4808  * removed again when the top-level xact commits or aborts.
4809  *
4810  * It might be possible to track such XIDs to distinguish this case from
4811  * actual errors, but it would be complicated and probably not worth it.
4812  * So, just ignore the search result.
4813  */
4814  (void) KnownAssignedXidsSearch(xid, true);
4815 }
4816 
4817 /*
4818  * KnownAssignedXidsRemoveTree
4819  * Remove xid (if it's not InvalidTransactionId) and all the subxids.
4820  *
4821  * Caller must hold ProcArrayLock in exclusive mode.
4822  */
4823 static void
4825  TransactionId *subxids)
4826 {
4827  int i;
4828 
4829  if (TransactionIdIsValid(xid))
4831 
4832  for (i = 0; i < nsubxids; i++)
4833  KnownAssignedXidsRemove(subxids[i]);
4834 
4835  /* Opportunistically compress the array */
4837 }
4838 
4839 /*
4840  * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid
4841  * then clear the whole table.
4842  *
4843  * Caller must hold ProcArrayLock in exclusive mode.
4844  */
4845 static void
4847 {
4848  ProcArrayStruct *pArray = procArray;
4849  int count = 0;
4850  int head,
4851  tail,
4852  i;
4853 
4854  if (!TransactionIdIsValid(removeXid))
4855  {
4856  elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
4857  pArray->numKnownAssignedXids = 0;
4858  pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0;
4859  return;
4860  }
4861 
4862  elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid);
4863 
4864  /*
4865  * Mark entries invalid starting at the tail. Since array is sorted, we
4866  * can stop as soon as we reach an entry >= removeXid.
4867  */
4868  tail = pArray->tailKnownAssignedXids;
4869  head = pArray->headKnownAssignedXids;
4870 
4871  for (i = tail; i < head; i++)
4872  {
4873  if (KnownAssignedXidsValid[i])
4874  {
4875  TransactionId knownXid = KnownAssignedXids[i];
4876 
4877  if (TransactionIdFollowsOrEquals(knownXid, removeXid))
4878  break;
4879 
4880  if (!StandbyTransactionIdIsPrepared(knownXid))
4881  {
4882  KnownAssignedXidsValid[i] = false;
4883  count++;
4884  }
4885  }
4886  }
4887 
4888  pArray->numKnownAssignedXids -= count;
4889  Assert(pArray->numKnownAssignedXids >= 0);
4890 
4891  /*
4892  * Advance the tail pointer if we've marked the tail item invalid.
4893  */
4894  for (i = tail; i < head; i++)
4895  {
4896  if (KnownAssignedXidsValid[i])
4897  break;
4898  }
4899  if (i >= head)
4900  {
4901  /* Array is empty, so we can reset both pointers */
4902  pArray->headKnownAssignedXids = 0;
4903  pArray->tailKnownAssignedXids = 0;
4904  }
4905  else
4906  {
4907  pArray->tailKnownAssignedXids = i;
4908  }
4909 
4910  /* Opportunistically compress the array */
4912 }
4913 
4914 /*
4915  * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
4916  * We filter out anything >= xmax.
4917  *
4918  * Returns the number of XIDs stored into xarray[]. Caller is responsible
4919  * that array is large enough.
4920  *
4921  * Caller must hold ProcArrayLock in (at least) shared mode.
4922  */
4923 static int
4925 {
4927 
4928  return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax);
4929 }
4930 
4931 /*
4932  * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus
4933  * we reduce *xmin to the lowest xid value seen if not already lower.
4934  *
4935  * Caller must hold ProcArrayLock in (at least) shared mode.
4936  */
4937 static int
4939  TransactionId xmax)
4940 {
4941  int count = 0;
4942  int head,
4943  tail;
4944  int i;
4945 
4946  /*
4947  * Fetch head just once, since it may change while we loop. We can stop
4948  * once we reach the initially seen head, since we are certain that an xid
4949  * cannot enter and then leave the array while we hold ProcArrayLock. We
4950  * might miss newly-added xids, but they should be >= xmax so irrelevant
4951  * anyway.
4952  *
4953  * Must take spinlock to ensure we see up-to-date array contents.
4954  */
4956  tail = procArray->tailKnownAssignedXids;
4957  head = procArray->headKnownAssignedXids;
4959 
4960  for (i = tail; i < head; i++)
4961  {
4962  /* Skip any gaps in the array */
4963  if (KnownAssignedXidsValid[i])
4964  {
4965  TransactionId knownXid = KnownAssignedXids[i];
4966 
4967  /*
4968  * Update xmin if required. Only the first XID need be checked,
4969  * since the array is sorted.
4970  */
4971  if (count == 0 &&
4972  TransactionIdPrecedes(knownXid, *xmin))
4973  *xmin = knownXid;
4974 
4975  /*
4976  * Filter out anything >= xmax, again relying on sorted property
4977  * of array.
4978  */
4979  if (TransactionIdIsValid(xmax) &&
4980  TransactionIdFollowsOrEquals(knownXid, xmax))
4981  break;
4982 
4983  /* Add knownXid into output array */
4984  xarray[count++] = knownXid;
4985  }
4986  }
4987 
4988  return count;
4989 }
4990 
4991 /*
4992  * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId
4993  * if nothing there.
4994  */
4995 static TransactionId
4997 {
4998  int head,
4999  tail;
5000  int i;
5001 
5002  /*
5003  * Fetch head just once, since it may change while we loop.
5004  */
5006  tail = procArray->tailKnownAssignedXids;
5007  head = procArray->headKnownAssignedXids;
5009 
5010  for (i = tail; i < head; i++)
5011  {
5012  /* Skip any gaps in the array */
5013  if (KnownAssignedXidsValid[i])
5014  return KnownAssignedXids[i];
5015  }
5016 
5017  return InvalidTransactionId;
5018 }
5019 
5020 /*
5021  * Display KnownAssignedXids to provide debug trail
5022  *
5023  * Currently this is only called within startup process, so we need no
5024  * special locking.
5025  *
5026  * Note this is pretty expensive, and much of the expense will be incurred
5027  * even if the elog message will get discarded. It's not currently called
5028  * in any performance-critical places, however, so no need to be tenser.
5029  */
5030 static void
5032 {
5033  ProcArrayStruct *pArray = procArray;
5035  int head,
5036  tail,
5037  i;
5038  int nxids = 0;
5039 
5040  tail = pArray->tailKnownAssignedXids;
5041  head = pArray->headKnownAssignedXids;
5042 
5043  initStringInfo(&buf);
5044 
5045  for (i = tail; i < head; i++)
5046  {
5047  if (KnownAssignedXidsValid[i])
5048  {
5049  nxids++;
5050  appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
5051  }
5052  }
5053 
5054  elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",
5055  nxids,
5056  pArray->numKnownAssignedXids,
5057  pArray->tailKnownAssignedXids,
5058  pArray->headKnownAssignedXids,
5059  buf.data);
5060 
5061  pfree(buf.data);
5062 }
5063 
5064 /*
5065  * KnownAssignedXidsReset
5066  * Resets KnownAssignedXids to be empty
5067  */
5068 static void
5070 {
5071  ProcArrayStruct *pArray = procArray;
5072 
5073  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
5074 
5075  pArray->numKnownAssignedXids = 0;
5076  pArray->tailKnownAssignedXids = 0;
5077  pArray->headKnownAssignedXids = 0;
5078 
5079  LWLockRelease(ProcArrayLock);
5080 }
#define TransactionIdAdvance(dest)
Definition: transam.h:91
int slock_t
Definition: s_lock.h:934
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1000
#define NIL
Definition: pg_list.h:65
#define AmStartupProcess()
Definition: miscadmin.h:433
static TransactionId latestObservedXid
Definition: procarray.c:259
TransactionId oldest_considered_running
Definition: procarray.c:207
VirtualTransactionId * GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, bool allDbs, int excludeVacuum, int *nvxids)
Definition: procarray.c:3195
TransactionId oldestRunningXid
Definition: standby.h:83
bool procArrayGroupMember
Definition: proc.h:216
uint64 snapXactCompletionCount
Definition: snapshot.h:216
pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
Definition: procarray.c:3357
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:103
#define PROCARRAY_MAXPROCS
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition: procarray.c:2881
static void ComputeXidHorizons(ComputeXidHorizonsResult *h)
Definition: procarray.c:1667
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1938
FullTransactionId latest_completed
Definition: procarray.c:187
int CountDBBackends(Oid databaseid)
Definition: procarray.c:3465
#define DEBUG1
Definition: elog.h:25
#define likely(x)
Definition: c.h:272
TransactionId shared_oldest_nonremovable_raw
Definition: procarray.c:227
static void pgstat_report_wait_end(void)
Definition: wait_event.h:277
static void KnownAssignedXidsDisplay(int trace_level)
Definition: procarray.c:5031
#define GET_VXID_FROM_PGPROC(vxid, proc)
Definition: lock.h:81
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
#define PROC_IN_LOGICAL_DECODING
Definition: proc.h:61
BackendId backendId
Definition: proc.h:153
uint32 TransactionId
Definition: c.h:587
bool copied
Definition: snapshot.h:185
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition: varsup.c:277
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: posix_sema.c:340
#define DEBUG3
Definition: elog.h:23
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:869
Oid GetUserId(void)
Definition: miscinit.c:478
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:150
XidCacheStatus * subxidStates
Definition: proc.h:327
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1920
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
static void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:681
PGPROC * BackendPidGetProc(int pid)
Definition: procarray.c:3067
#define FullTransactionIdIsValid(x)
Definition: transam.h:55
PGPROC * MyProc
Definition: proc.c:68
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1325
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:101
TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids)
Definition: transam.c:365
int vacuum_defer_cleanup_age
Definition: standby.c:39
#define UINT32_ACCESS_ONCE(var)
Definition: procarray.c:69
#define SpinLockInit(lock)
Definition: spin.h:60
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:590
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:4843
VirtualTransactionId * GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
Definition: procarray.c:3283
TransactionId replication_slot_catalog_xmin
Definition: procarray.c:98
XLogRecPtr lsn
Definition: snapshot.h:209
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:349
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids)
Definition: procarray.c:2977
static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:739
unsigned char uint8
Definition: c.h:439
static FullTransactionId FullXidRelativeTo(FullTransactionId rel, TransactionId xid)
Definition: procarray.c:4218
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:350
#define xc_by_my_xact_inc()
Definition: procarray.c:313
static void MaintainLatestCompletedXidRecovery(TransactionId latestXid)
Definition: procarray.c:935
Oid roleId
Definition: proc.h:155
TransactionId oldestXid
Definition: transam.h:215
int errcode(int sqlerrcode)
Definition: elog.c:698
TransactionId RecentXmin
Definition: snapmgr.c:113
uint64 xactCompletionCount
Definition: transam.h:241
slock_t known_assigned_xids_lck
Definition: procarray.c:84
bool superuser(void)
Definition: superuser.c:46
PROC_HDR * ProcGlobal
Definition: proc.c:80
bool suboverflowed
Definition: snapshot.h:182
TransactionId * xids
Definition: standby.h:86
#define kill(pid, sig)
Definition: win32_port.h:454
uint8 statusFlags
Definition: proc.h:189
bool GlobalVisTestIsRemovableFullXid(GlobalVisState *state, FullTransactionId fxid)
Definition: procarray.c:4089
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:125
FullTransactionId latestCompletedXid
Definition: transam.h:231
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:110
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:8237
static FullTransactionId FullTransactionIdFromU64(uint64 value)
Definition: transam.h:81
#define TransactionIdRetreat(dest)
Definition: transam.h:141
LocalTransactionId localTransactionId
Definition: lock.h:66
void ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:620
#define xc_by_child_xid_inc()
Definition: procarray.c:316
bool TransactionIdIsKnownCompleted(TransactionId transactionId)
Definition: transam.c:238
#define DEBUG4
Definition: elog.h:22
#define fprintf
Definition: port.h:220
void ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, TransactionId *subxids, TransactionId max_xid)
Definition: procarray.c:4371
#define MAXAUTOVACPIDS
FullTransactionId nextXid
Definition: transam.h:213
uint32 regd_count
Definition: snapshot.h:205
#define OidIsValid(objectId)
Definition: c.h:710
void ProcArrayGetReplicationSlotXmin(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:3824
TransactionId catalog_oldest_nonremovable
Definition: procarray.c:233
XidCacheStatus subxidStatus
Definition: proc.h:210
void ExpireAllKnownAssignedTransactionIds(void)
Definition: procarray.c:4397
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4131
signed int int32
Definition: c.h:429
int trace_recovery(int trace_level)
Definition: elog.c:3609
#define PROC_VACUUM_STATE_MASK
Definition: proc.h:65
bool overflowed
Definition: proc.h:43
#define XidFromFullTransactionId(x)
Definition: transam.h:48
TransactionId TransactionXmin
Definition: snapmgr.c:112
TransactionId latestCompletedXid
Definition: standby.h:84
FullTransactionId definitely_needed
Definition: procarray.c:172
Definition: type.h:89
#define malloc(a)
Definition: header.h:50
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1805
static uint32 pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval)
Definition: atomics.h:292
bool isBackgroundWorker
Definition: proc.h:160
#define VirtualTransactionIdEquals(vxid1, vxid2)
Definition: lock.h:75
#define xc_by_recent_xmin_inc()
Definition: procarray.c:311
#define xc_by_known_xact_inc()
Definition: procarray.c:312
static void GetSnapshotDataInitOldSnapshot(Snapshot snapshot)
Definition: procarray.c:2023
bool MinimumActiveBackends(int min)
Definition: procarray.c:3412
static void KnownAssignedXidsRemovePreceding(TransactionId xid)
Definition: procarray.c:4846
PGPROC * BackendPidGetProcWithLock(int pid)
Definition: procarray.c:3090
#define SpinLockAcquire(lock)
Definition: spin.h:62
void pg_usleep(long microsec)
Definition: signal.c:53
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:3963
int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
Definition: procsignal.c:261
pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3363
#define AssertTransactionIdInAllowableRange(xid)
Definition: transam.h:294
void pfree(void *pointer)
Definition: mcxt.c:1169
#define PROC_IN_VACUUM
Definition: proc.h:55
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
int CountDBConnections(Oid databaseid)
Definition: procarray.c:3495
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1944
#define ERROR
Definition: elog.h:46
void XidCacheRemoveRunningXids(TransactionId xid, int nxids, const TransactionId *xids, TransactionId latestXid)
Definition: procarray.c:3847
#define FullTransactionIdIsNormal(x)
Definition: transam.h:58
TimestampTz GetSnapshotCurrentTimestamp(void)
Definition: snapmgr.c:1635
bool delayChkpt
Definition: proc.h:187
void ProcArrayClearTransaction(PGPROC *proc)
Definition: procarray.c:853
#define lfirst_int(lc)
Definition: pg_list.h:170
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
void ExtendSUBTRANS(TransactionId newestXact)
Definition: subtrans.c:308
void ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:525
#define FATAL
Definition: elog.h:49
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:11747
TransactionId slot_catalog_xmin
Definition: procarray.c:194
void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3526
TransactionId xmin
Definition: proc.h:138
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2155
#define xc_by_main_xid_inc()
Definition: procarray.c:315
static bool GlobalVisTestShouldUpdate(GlobalVisState *state)
Definition: procarray.c:4014
static GlobalVisState GlobalVisSharedRels
Definition: procarray.c:273
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1375
static char * buf
Definition: pg_test_fsync.c:68
bool recoveryConflictPending
Definition: proc.h:167
#define xc_by_known_assigned_inc()
Definition: procarray.c:317
bool IsUnderPostmaster
Definition: globals.c:112
VariableCache ShmemVariableCache
Definition: varsup.c:34
int maxKnownAssignedXids
Definition: procarray.c:80
#define InvalidTransactionId
Definition: transam.h:31
bool StandbyTransactionIdIsPrepared(TransactionId xid)
Definition: twophase.c:1383
TransactionId * xids
Definition: proc.h:321
static PGPROC * allProcs
Definition: procarray.c:252
Oid databaseId
Definition: proc.h:154
unsigned int uint32
Definition: c.h:441
TransactionId shared_oldest_nonremovable
Definition: procarray.c:216
TransactionId xmax
Definition: snapshot.h:158
TransactionId xmin
Definition: snapshot.h:157
static void KnownAssignedXidsReset(void)
Definition: procarray.c:5069
LOCK * waitLock
Definition: proc.h:179
int numKnownAssignedXids
Definition: procarray.c:81
static bool * KnownAssignedXidsValid
Definition: procarray.c:258
struct XidCache subxids
Definition: proc.h:212
TransactionId lastOverflowedXid
Definition: procarray.c:93
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:181
#define xc_by_latest_xid_inc()
Definition: procarray.c:314
bool superuser_arg(Oid roleid)
Definition: superuser.c:56
#define INVALID_PGPROCNO
Definition: proc.h:80
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId * xip
Definition: snapshot.h:168
static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:4824
pg_atomic_uint32 procArrayGroupNext
Definition: proc.h:218
List * lappend_int(List *list, int datum)
Definition: list.c:354
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:261
Definition: proc.h:315
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
static ProcArrayStruct * procArray
Definition: procarray.c:250
#define WARNING
Definition: elog.h:40
#define VirtualTransactionIdIsValid(vxid)
Definition: lock.h:71
signed char int8
Definition: c.h:427
static TransactionId ComputeXidHorizonsResultLastXmin
Definition: procarray.c:283
#define SpinLockRelease(lock)
Definition: spin.h:64
TransactionId replication_slot_xmin
Definition: procarray.c:96
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
int BackendXidGetPid(TransactionId xid)
Definition: procarray.c:3127
#define InvalidBackendId
Definition: backendid.h:23
static void MaintainLatestCompletedXid(TransactionId latestXid)
Definition: procarray.c:913
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:626
void GetReplicationHorizons(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:1981
static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
Definition: procarray.c:4924
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
Oid MyDatabaseId
Definition: globals.c:88
static TransactionId KnownAssignedXidsGetOldestXmin(void)
Definition: procarray.c:4996
#define InvalidOid
Definition: postgres_ext.h:36
CommandId curcid
Definition: snapshot.h:187
#define ereport(elevel,...)
Definition: elog.h:157
bool GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
Definition: procarray.c:4197
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:2003
int pgprocnos[FLEXIBLE_ARRAY_MEMBER]
Definition: procarray.c:101
#define PROC_IN_SAFE_IC
Definition: proc.h:56
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]
Definition: proc.h:48
bool GlobalVisCheckRemovableFullXid(Relation rel, FullTransactionId fxid)
Definition: procarray.c:4183
TransactionId temp_oldest_nonremovable
Definition: procarray.c:245
#define TOTAL_MAX_CACHED_SUBXIDS
static TransactionId TransactionIdOlder(TransactionId a, TransactionId b)
Definition: transam.h:327
#define Assert(condition)
Definition: c.h:804
static TransactionId * KnownAssignedXids
Definition: procarray.c:257
BackendId backendId
Definition: lock.h:65
Definition: regguts.h:317
#define pg_read_barrier()
Definition: atomics.h:158
#define U64FromFullTransactionId(x)
Definition: transam.h:49
void CreateSharedProcArray(void)
Definition: procarray.c:394
#define FullTransactionIdFollowsOrEquals(a, b)
Definition: transam.h:54
bool takenDuringRecovery
Definition: snapshot.h:184
size_t Size
Definition: c.h:540
struct ComputeXidHorizonsResult ComputeXidHorizonsResult
Snapshot GetSnapshotData(Snapshot snapshot)
Definition: procarray.c:2140
int errdetail_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1134
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
Definition: procarray.c:4033
static TransactionId TransactionIdRetreatedBy(TransactionId xid, uint32 amount)
Definition: transam.h:315
static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, TransactionId xmax)
Definition: procarray.c:4938
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, bool exclusive_lock)
Definition: procarray.c:4574
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:1968
bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
Definition: procarray.c:2578
#define NormalTransactionIdPrecedes(id1, id2)
Definition: transam.h:147
#define xc_no_overflow_inc()
Definition: procarray.c:318
bool EnableHotStandby
Definition: xlog.c:99
FullTransactionId maybe_needed
Definition: procarray.c:175
void PGSemaphoreLock(PGSemaphore sema)
Definition: posix_sema.c:320
static void KnownAssignedXidsCompress(bool force)
Definition: procarray.c:4512
uint8 count
Definition: proc.h:41
int CountUserBackends(Oid roleid)
Definition: procarray.c:3566
TransactionId xid
Definition: proc.h:133
static FullTransactionId FullTransactionIdNewer(FullTransactionId a, FullTransactionId b)
Definition: transam.h:353
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:394
static bool KnownAssignedXidExists(TransactionId xid)
Definition: procarray.c:4785
static GlobalVisState GlobalVisTempRels
Definition: procarray.c:276
int pgprocno
Definition: proc.h:150
TransactionId nextXid
Definition: standby.h:82
bool TransactionIdIsActive(TransactionId xid)
Definition: procarray.c:1557
#define xc_slow_answer_inc()
Definition: procarray.c:319
pg_atomic_uint32 procArrayGroupFirst
Definition: proc.h:346
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:969
uint32 xcnt
Definition: snapshot.h:169
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
struct ProcArrayStruct ProcArrayStruct
static bool KnownAssignedXidsSearch(TransactionId xid, bool remove)
Definition: procarray.c:4692
FullTransactionId GlobalVisTestNonRemovableFullHorizon(GlobalVisState *state)
Definition: procarray.c:4158
static void KnownAssignedXidsRemove(TransactionId xid)
Definition: procarray.c:4798
#define elog(elevel,...)
Definition: elog.h:232
#define InvalidLocalTransactionId
Definition: lock.h:69
TransactionId data_oldest_nonremovable
Definition: procarray.c:239
int i
int pgxactoff
Definition: proc.h:148
void ExpireOldKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:4409
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2816
bool IsBackendPid(int pid)
Definition: procarray.c:3162
#define pg_write_barrier()
Definition: atomics.h:159
ProcSignalReason
Definition: procsignal.h:30
static bool GetSnapshotDataReuse(Snapshot snapshot)
Definition: procarray.c:2057
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
Definition: procarray.c:3802
#define unlikely(x)
Definition: c.h:273
int GetMaxSnapshotSubxidCount(void)
Definition: procarray.c:2014
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2641
void ProcArrayApplyXidAssignment(TransactionId topxid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:1241
TimestampTz whenTaken
Definition: snapshot.h:208
void TerminateOtherDBBackends(Oid databaseId)
Definition: procarray.c:3694
PGPROC * allProcs
Definition: proc.h:318
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:102
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
bool CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
Definition: procarray.c:3616
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:761
uint8 * statusFlags
Definition: proc.h:333
#define qsort(a, b, c, d)
Definition: port.h:504
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static void GlobalVisUpdate(void)
Definition: procarray.c:4072
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
Definition: snapmgr.c:1858
PGSemaphore sem
Definition: proc.h:127
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
Definition: procarray.c:3021
static GlobalVisState GlobalVisCatalogRels
Definition: procarray.c:274
TransactionId GlobalVisTestNonRemovableHorizon(GlobalVisState *state)
Definition: procarray.c:4169
void RecordKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:4300
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void SubTransSetParent(TransactionId xid, TransactionId parent)
Definition: subtrans.c:74
static GlobalVisState GlobalVisDataRels
Definition: procarray.c:275
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
Definition: procarray.c:2503
int tailKnownAssignedXids
Definition: procarray.c:82
TransactionId slot_xmin
Definition: procarray.c:193
static TransactionId standbySnapshotPendingXmin
Definition: procarray.c:266
Definition: proc.h:121
static void FullTransactionIdAdvance(FullTransactionId *dest)
Definition: transam.h:128
Definition: pg_list.h:50
int pid
Definition: proc.h:146
HotStandbyState standbyState
Definition: xlog.c:212
void ProcArrayAdd(PGPROC *proc)
Definition: procarray.c:445
#define PROC_IS_AUTOVACUUM
Definition: proc.h:54
#define offsetof(type, field)
Definition: c.h:727
TransactionId procArrayGroupMemberXid
Definition: proc.h:224
Size ProcArrayShmemSize(void)
Definition: procarray.c:352
void StandbyReleaseOldLocks(TransactionId oldxid)
Definition: standby.c:1070
TransactionId * subxip
Definition: snapshot.h:180
uint32 active_count
Definition: snapshot.h:204
int headKnownAssignedXids
Definition: procarray.c:83
int xidComparator(const void *arg1, const void *arg2)
Definition: xid.c:136
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
int32 subxcnt
Definition: snapshot.h:181
LocalTransactionId lxid
Definition: proc.h:143