PostgreSQL Source Code  git master
procarray.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * procarray.c
4  * POSTGRES process array code.
5  *
6  *
7  * This module maintains arrays of PGPROC substructures, as well as associated
8  * arrays in ProcGlobal, for all active backends. Although there are several
9  * uses for this, the principal one is as a means of determining the set of
10  * currently running transactions.
11  *
12  * Because of various subtle race conditions it is critical that a backend
13  * hold the correct locks while setting or clearing its xid (in
14  * ProcGlobal->xids[]/MyProc->xid). See notes in
15  * src/backend/access/transam/README.
16  *
17  * The process arrays now also include structures representing prepared
18  * transactions. The xid and subxids fields of these are valid, as are the
19  * myProcLocks lists. They can be distinguished from regular backend PGPROCs
20  * at need by checking for pid == 0.
21  *
22  * During hot standby, we also keep a list of XIDs representing transactions
23  * that are known to be running on the primary (or more precisely, were running
24  * as of the current point in the WAL stream). This list is kept in the
25  * KnownAssignedXids array, and is updated by watching the sequence of
26  * arriving XIDs. This is necessary because if we leave those XIDs out of
27  * snapshots taken for standby queries, then they will appear to be already
28  * complete, leading to MVCC failures. Note that in hot standby, the PGPROC
29  * array represents standby processes, which by definition are not running
30  * transactions that have XIDs.
31  *
32  * It is perhaps possible for a backend on the primary to terminate without
33  * writing an abort record for its transaction. While that shouldn't really
34  * happen, it would tie up KnownAssignedXids indefinitely, so we protect
35  * ourselves by pruning the array when a valid list of running XIDs arrives.
36  *
37  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
38  * Portions Copyright (c) 1994, Regents of the University of California
39  *
40  *
41  * IDENTIFICATION
42  * src/backend/storage/ipc/procarray.c
43  *
44  *-------------------------------------------------------------------------
45  */
46 #include "postgres.h"
47 
48 #include <signal.h>
49 
50 #include "access/clog.h"
51 #include "access/subtrans.h"
52 #include "access/transam.h"
53 #include "access/twophase.h"
54 #include "access/xact.h"
55 #include "access/xlog.h"
56 #include "catalog/catalog.h"
57 #include "catalog/pg_authid.h"
58 #include "commands/dbcommands.h"
59 #include "miscadmin.h"
60 #include "pgstat.h"
61 #include "storage/proc.h"
62 #include "storage/procarray.h"
63 #include "storage/spin.h"
64 #include "utils/acl.h"
65 #include "utils/builtins.h"
66 #include "utils/rel.h"
67 #include "utils/snapmgr.h"
68 
69 #define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
70 
71 /* Our shared memory area */
72 typedef struct ProcArrayStruct
73 {
74  int numProcs; /* number of valid procs entries */
75  int maxProcs; /* allocated size of procs array */
76 
77  /*
78  * Known assigned XIDs handling
79  */
80  int maxKnownAssignedXids; /* allocated size of array */
81  int numKnownAssignedXids; /* current # of valid entries */
82  int tailKnownAssignedXids; /* index of oldest valid element */
83  int headKnownAssignedXids; /* index of newest element, + 1 */
84  slock_t known_assigned_xids_lck; /* protects head/tail pointers */
85 
86  /*
87  * Highest subxid that has been removed from KnownAssignedXids array to
88  * prevent overflow; or InvalidTransactionId if none. We track this for
89  * similar reasons to tracking overflowing cached subxids in PGPROC
90  * entries. Must hold exclusive ProcArrayLock to change this, and shared
91  * lock to read it.
92  */
94 
95  /* oldest xmin of any replication slot */
97  /* oldest catalog xmin of any replication slot */
99 
100  /* indexes into allProcs[], has PROCARRAY_MAXPROCS entries */
103 
104 /*
105  * State for the GlobalVisTest* family of functions. Those functions can
106  * e.g. be used to decide if a deleted row can be removed without violating
107  * MVCC semantics: If the deleted row's xmax is not considered to be running
108  * by anyone, the row can be removed.
109  *
110  * To avoid slowing down GetSnapshotData(), we don't calculate a precise
111  * cutoff XID while building a snapshot (looking at the frequently changing
112  * xmins scales badly). Instead we compute two boundaries while building the
113  * snapshot:
114  *
115  * 1) definitely_needed, indicating that rows deleted by XIDs >=
116  * definitely_needed are definitely still visible.
117  *
118  * 2) maybe_needed, indicating that rows deleted by XIDs < maybe_needed can
119  * definitely be removed
120  *
121  * When testing an XID that falls in between the two (i.e. XID >= maybe_needed
122  * && XID < definitely_needed), the boundaries can be recomputed (using
123  * ComputeXidHorizons()) to get a more accurate answer. This is cheaper than
124  * maintaining an accurate value all the time.
125  *
126  * As it is not cheap to compute accurate boundaries, we limit the number of
127  * times that happens in short succession. See GlobalVisTestShouldUpdate().
128  *
129  *
130  * There are three backend lifetime instances of this struct, optimized for
131  * different types of relations. As e.g. a normal user defined table in one
132  * database is inaccessible to backends connected to another database, a test
133  * specific to a relation can be more aggressive than a test for a shared
134  * relation. Currently we track four different states:
135  *
136  * 1) GlobalVisSharedRels, which only considers an XID's
137  * effects visible-to-everyone if neither snapshots in any database, nor a
138  * replication slot's xmin, nor a replication slot's catalog_xmin might
139  * still consider XID as running.
140  *
141  * 2) GlobalVisCatalogRels, which only considers an XID's
142  * effects visible-to-everyone if neither snapshots in the current
143  * database, nor a replication slot's xmin, nor a replication slot's
144  * catalog_xmin might still consider XID as running.
145  *
146  * I.e. the difference to GlobalVisSharedRels is that
147  * snapshot in other databases are ignored.
148  *
149  * 3) GlobalVisDataRels, which only considers an XID's
150  * effects visible-to-everyone if neither snapshots in the current
151  * database, nor a replication slot's xmin consider XID as running.
152  *
153  * I.e. the difference to GlobalVisCatalogRels is that
154  * replication slot's catalog_xmin is not taken into account.
155  *
156  * 4) GlobalVisTempRels, which only considers the current session, as temp
157  * tables are not visible to other sessions.
158  *
159  * GlobalVisTestFor(relation) returns the appropriate state
160  * for the relation.
161  *
162  * The boundaries are FullTransactionIds instead of TransactionIds to avoid
163  * wraparound dangers. There e.g. would otherwise exist no procarray state to
164  * prevent maybe_needed to become old enough after the GetSnapshotData()
165  * call.
166  *
167  * The typedef is in the header.
168  */
170 {
171  /* XIDs >= are considered running by some backend */
173 
174  /* XIDs < are not considered to be running by any backend */
176 };
177 
178 /*
179  * Result of ComputeXidHorizons().
180  */
182 {
183  /*
184  * The value of ShmemVariableCache->latestCompletedXid when
185  * ComputeXidHorizons() held ProcArrayLock.
186  */
188 
189  /*
190  * The same for procArray->replication_slot_xmin and.
191  * procArray->replication_slot_catalog_xmin.
192  */
195 
196  /*
197  * Oldest xid that any backend might still consider running. This needs to
198  * include processes running VACUUM, in contrast to the normal visibility
199  * cutoffs, as vacuum needs to be able to perform pg_subtrans lookups when
200  * determining visibility, but doesn't care about rows above its xmin to
201  * be removed.
202  *
203  * This likely should only be needed to determine whether pg_subtrans can
204  * be truncated. It currently includes the effects of replication slots,
205  * for historical reasons. But that could likely be changed.
206  */
208 
209  /*
210  * Oldest xid for which deleted tuples need to be retained in shared
211  * tables.
212  *
213  * This includes the effects of replication slots. If that's not desired,
214  * look at shared_oldest_nonremovable_raw;
215  */
217 
218  /*
219  * Oldest xid that may be necessary to retain in shared tables. This is
220  * the same as shared_oldest_nonremovable, except that is not affected by
221  * replication slot's catalog_xmin.
222  *
223  * This is mainly useful to be able to send the catalog_xmin to upstream
224  * streaming replication servers via hot_standby_feedback, so they can
225  * apply the limit only when accessing catalog tables.
226  */
228 
229  /*
230  * Oldest xid for which deleted tuples need to be retained in non-shared
231  * catalog tables.
232  */
234 
235  /*
236  * Oldest xid for which deleted tuples need to be retained in normal user
237  * defined tables.
238  */
240 
241  /*
242  * Oldest xid for which deleted tuples need to be retained in this
243  * session's temporary tables.
244  */
246 
248 
249 
251 
252 static PGPROC *allProcs;
253 
254 /*
255  * Bookkeeping for tracking emulated transactions in recovery
256  */
260 
261 /*
262  * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
263  * the highest xid that might still be running that we don't have in
264  * KnownAssignedXids.
265  */
267 
268 /*
269  * State for visibility checks on different types of relations. See struct
270  * GlobalVisState for details. As shared, catalog, normal and temporary
271  * relations can have different horizons, one such state exists for each.
272  */
277 
278 /*
279  * This backend's RecentXmin at the last time the accurate xmin horizon was
280  * recomputed, or InvalidTransactionId if it has not. Used to limit how many
281  * times accurate horizons are recomputed. See GlobalVisTestShouldUpdate().
282  */
284 
285 #ifdef XIDCACHE_DEBUG
286 
287 /* counters for XidCache measurement */
288 static long xc_by_recent_xmin = 0;
289 static long xc_by_known_xact = 0;
290 static long xc_by_my_xact = 0;
291 static long xc_by_latest_xid = 0;
292 static long xc_by_main_xid = 0;
293 static long xc_by_child_xid = 0;
294 static long xc_by_known_assigned = 0;
295 static long xc_no_overflow = 0;
296 static long xc_slow_answer = 0;
297 
298 #define xc_by_recent_xmin_inc() (xc_by_recent_xmin++)
299 #define xc_by_known_xact_inc() (xc_by_known_xact++)
300 #define xc_by_my_xact_inc() (xc_by_my_xact++)
301 #define xc_by_latest_xid_inc() (xc_by_latest_xid++)
302 #define xc_by_main_xid_inc() (xc_by_main_xid++)
303 #define xc_by_child_xid_inc() (xc_by_child_xid++)
304 #define xc_by_known_assigned_inc() (xc_by_known_assigned++)
305 #define xc_no_overflow_inc() (xc_no_overflow++)
306 #define xc_slow_answer_inc() (xc_slow_answer++)
307 
308 static void DisplayXidCache(void);
309 #else /* !XIDCACHE_DEBUG */
310 
311 #define xc_by_recent_xmin_inc() ((void) 0)
312 #define xc_by_known_xact_inc() ((void) 0)
313 #define xc_by_my_xact_inc() ((void) 0)
314 #define xc_by_latest_xid_inc() ((void) 0)
315 #define xc_by_main_xid_inc() ((void) 0)
316 #define xc_by_child_xid_inc() ((void) 0)
317 #define xc_by_known_assigned_inc() ((void) 0)
318 #define xc_no_overflow_inc() ((void) 0)
319 #define xc_slow_answer_inc() ((void) 0)
320 #endif /* XIDCACHE_DEBUG */
321 
322 /* Primitives for KnownAssignedXids array handling for standby */
323 static void KnownAssignedXidsCompress(bool force);
324 static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
325  bool exclusive_lock);
326 static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
327 static bool KnownAssignedXidExists(TransactionId xid);
328 static void KnownAssignedXidsRemove(TransactionId xid);
329 static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
330  TransactionId *subxids);
332 static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
334  TransactionId *xmin,
335  TransactionId xmax);
337 static void KnownAssignedXidsDisplay(int trace_level);
338 static void KnownAssignedXidsReset(void);
339 static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid);
340 static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
341 static void MaintainLatestCompletedXid(TransactionId latestXid);
343 
345  TransactionId xid);
346 static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons);
347 
348 /*
349  * Report shared-memory space needed by CreateSharedProcArray.
350  */
351 Size
353 {
354  Size size;
355 
356  /* Size of the ProcArray structure itself */
357 #define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts)
358 
360  size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
361 
362  /*
363  * During Hot Standby processing we have a data structure called
364  * KnownAssignedXids, created in shared memory. Local data structures are
365  * also created in various backends during GetSnapshotData(),
366  * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
367  * main structures created in those functions must be identically sized,
368  * since we may at times copy the whole of the data structures around. We
369  * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
370  *
371  * Ideally we'd only create this structure if we were actually doing hot
372  * standby in the current run, but we don't know that yet at the time
373  * shared memory is being set up.
374  */
375 #define TOTAL_MAX_CACHED_SUBXIDS \
376  ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
377 
378  if (EnableHotStandby)
379  {
380  size = add_size(size,
381  mul_size(sizeof(TransactionId),
383  size = add_size(size,
384  mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
385  }
386 
387  return size;
388 }
389 
390 /*
391  * Initialize the shared PGPROC array during postmaster startup.
392  */
393 void
395 {
396  bool found;
397 
398  /* Create or attach to the ProcArray shared structure */
399  procArray = (ProcArrayStruct *)
400  ShmemInitStruct("Proc Array",
402  mul_size(sizeof(int),
404  &found);
405 
406  if (!found)
407  {
408  /*
409  * We're the first - initialize.
410  */
411  procArray->numProcs = 0;
412  procArray->maxProcs = PROCARRAY_MAXPROCS;
414  procArray->numKnownAssignedXids = 0;
415  procArray->tailKnownAssignedXids = 0;
416  procArray->headKnownAssignedXids = 0;
422  }
423 
424  allProcs = ProcGlobal->allProcs;
425 
426  /* Create or attach to the KnownAssignedXids arrays too, if needed */
427  if (EnableHotStandby)
428  {
430  ShmemInitStruct("KnownAssignedXids",
431  mul_size(sizeof(TransactionId),
433  &found);
434  KnownAssignedXidsValid = (bool *)
435  ShmemInitStruct("KnownAssignedXidsValid",
436  mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
437  &found);
438  }
439 }
440 
441 /*
442  * Add the specified PGPROC to the shared array.
443  */
444 void
446 {
447  ProcArrayStruct *arrayP = procArray;
448  int index;
449 
450  /* See ProcGlobal comment explaining why both locks are held */
451  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
452  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
453 
454  if (arrayP->numProcs >= arrayP->maxProcs)
455  {
456  /*
457  * Oops, no room. (This really shouldn't happen, since there is a
458  * fixed supply of PGPROC structs too, and so we should have failed
459  * earlier.)
460  */
461  ereport(FATAL,
462  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
463  errmsg("sorry, too many clients already")));
464  }
465 
466  /*
467  * Keep the procs array sorted by (PGPROC *) so that we can utilize
468  * locality of references much better. This is useful while traversing the
469  * ProcArray because there is an increased likelihood of finding the next
470  * PGPROC structure in the cache.
471  *
472  * Since the occurrence of adding/removing a proc is much lower than the
473  * access to the ProcArray itself, the overhead should be marginal
474  */
475  for (index = 0; index < arrayP->numProcs; index++)
476  {
477  /*
478  * If we are the first PGPROC or if we have found our right position
479  * in the array, break
480  */
481  if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno))
482  break;
483  }
484 
485  memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index],
486  (arrayP->numProcs - index) * sizeof(*arrayP->pgprocnos));
487  memmove(&ProcGlobal->xids[index + 1], &ProcGlobal->xids[index],
488  (arrayP->numProcs - index) * sizeof(*ProcGlobal->xids));
489  memmove(&ProcGlobal->subxidStates[index + 1], &ProcGlobal->subxidStates[index],
490  (arrayP->numProcs - index) * sizeof(*ProcGlobal->subxidStates));
491  memmove(&ProcGlobal->statusFlags[index + 1], &ProcGlobal->statusFlags[index],
492  (arrayP->numProcs - index) * sizeof(*ProcGlobal->statusFlags));
493 
494  arrayP->pgprocnos[index] = proc->pgprocno;
495  ProcGlobal->xids[index] = proc->xid;
496  ProcGlobal->subxidStates[index] = proc->subxidStatus;
497  ProcGlobal->statusFlags[index] = proc->statusFlags;
498 
499  arrayP->numProcs++;
500 
501  for (; index < arrayP->numProcs; index++)
502  {
503  allProcs[arrayP->pgprocnos[index]].pgxactoff = index;
504  }
505 
506  /*
507  * Release in reversed acquisition order, to reduce frequency of having to
508  * wait for XidGenLock while holding ProcArrayLock.
509  */
510  LWLockRelease(XidGenLock);
511  LWLockRelease(ProcArrayLock);
512 }
513 
514 /*
515  * Remove the specified PGPROC from the shared array.
516  *
517  * When latestXid is a valid XID, we are removing a live 2PC gxact from the
518  * array, and thus causing it to appear as "not running" anymore. In this
519  * case we must advance latestCompletedXid. (This is essentially the same
520  * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
521  * the ProcArrayLock only once, and don't damage the content of the PGPROC;
522  * twophase.c depends on the latter.)
523  */
524 void
526 {
527  ProcArrayStruct *arrayP = procArray;
528  int index;
529 
530 #ifdef XIDCACHE_DEBUG
531  /* dump stats at backend shutdown, but not prepared-xact end */
532  if (proc->pid != 0)
533  DisplayXidCache();
534 #endif
535 
536  /* See ProcGlobal comment explaining why both locks are held */
537  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
538  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
539 
540  Assert(ProcGlobal->allProcs[arrayP->pgprocnos[proc->pgxactoff]].pgxactoff == proc->pgxactoff);
541 
542  if (TransactionIdIsValid(latestXid))
543  {
545 
546  /* Advance global latestCompletedXid while holding the lock */
547  MaintainLatestCompletedXid(latestXid);
548 
549  /* Same with xactCompletionCount */
551 
552  ProcGlobal->xids[proc->pgxactoff] = 0;
555  }
556  else
557  {
558  /* Shouldn't be trying to remove a live transaction here */
560  }
561 
565  ProcGlobal->statusFlags[proc->pgxactoff] = 0;
566 
567  for (index = 0; index < arrayP->numProcs; index++)
568  {
569  if (arrayP->pgprocnos[index] == proc->pgprocno)
570  {
571  /* Keep the PGPROC array sorted. See notes above */
572  memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1],
573  (arrayP->numProcs - index - 1) * sizeof(*arrayP->pgprocnos));
574  memmove(&ProcGlobal->xids[index], &ProcGlobal->xids[index + 1],
575  (arrayP->numProcs - index - 1) * sizeof(*ProcGlobal->xids));
576  memmove(&ProcGlobal->subxidStates[index], &ProcGlobal->subxidStates[index + 1],
577  (arrayP->numProcs - index - 1) * sizeof(*ProcGlobal->subxidStates));
578  memmove(&ProcGlobal->statusFlags[index], &ProcGlobal->statusFlags[index + 1],
579  (arrayP->numProcs - index - 1) * sizeof(*ProcGlobal->statusFlags));
580 
581  arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
582  arrayP->numProcs--;
583 
584  /* adjust for removed PGPROC */
585  for (; index < arrayP->numProcs; index++)
586  allProcs[arrayP->pgprocnos[index]].pgxactoff--;
587 
588  /*
589  * Release in reversed acquisition order, to reduce frequency of
590  * having to wait for XidGenLock while holding ProcArrayLock.
591  */
592  LWLockRelease(XidGenLock);
593  LWLockRelease(ProcArrayLock);
594  return;
595  }
596  }
597 
598  /* Oops */
599  LWLockRelease(XidGenLock);
600  LWLockRelease(ProcArrayLock);
601 
602  elog(LOG, "failed to find proc %p in ProcArray", proc);
603 }
604 
605 
606 /*
607  * ProcArrayEndTransaction -- mark a transaction as no longer running
608  *
609  * This is used interchangeably for commit and abort cases. The transaction
610  * commit/abort must already be reported to WAL and pg_xact.
611  *
612  * proc is currently always MyProc, but we pass it explicitly for flexibility.
613  * latestXid is the latest Xid among the transaction's main XID and
614  * subtransactions, or InvalidTransactionId if it has no XID. (We must ask
615  * the caller to pass latestXid, instead of computing it from the PGPROC's
616  * contents, because the subxid information in the PGPROC might be
617  * incomplete.)
618  */
619 void
621 {
622  if (TransactionIdIsValid(latestXid))
623  {
624  /*
625  * We must lock ProcArrayLock while clearing our advertised XID, so
626  * that we do not exit the set of "running" transactions while someone
627  * else is taking a snapshot. See discussion in
628  * src/backend/access/transam/README.
629  */
631 
632  /*
633  * If we can immediately acquire ProcArrayLock, we clear our own XID
634  * and release the lock. If not, use group XID clearing to improve
635  * efficiency.
636  */
637  if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
638  {
639  ProcArrayEndTransactionInternal(proc, latestXid);
640  LWLockRelease(ProcArrayLock);
641  }
642  else
643  ProcArrayGroupClearXid(proc, latestXid);
644  }
645  else
646  {
647  /*
648  * If we have no XID, we don't need to lock, since we won't affect
649  * anyone else's calculation of a snapshot. We might change their
650  * estimate of global xmin, but that's OK.
651  */
653  Assert(proc->subxidStatus.count == 0);
655 
657  proc->xmin = InvalidTransactionId;
658  proc->delayChkpt = false; /* be sure this is cleared in abort */
659  proc->recoveryConflictPending = false;
660 
661  /* must be cleared with xid/xmin: */
662  /* avoid unnecessarily dirtying shared cachelines */
664  {
665  Assert(!LWLockHeldByMe(ProcArrayLock));
666  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
670  LWLockRelease(ProcArrayLock);
671  }
672  }
673 }
674 
675 /*
676  * Mark a write transaction as no longer running.
677  *
678  * We don't do any locking here; caller must handle that.
679  */
680 static inline void
682 {
683  size_t pgxactoff = proc->pgxactoff;
684 
685  /*
686  * Note: we need exclusive lock here because we're going to change other
687  * processes' PGPROC entries.
688  */
689  Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE));
691  Assert(ProcGlobal->xids[pgxactoff] == proc->xid);
692 
693  ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
694  proc->xid = InvalidTransactionId;
696  proc->xmin = InvalidTransactionId;
697  proc->delayChkpt = false; /* be sure this is cleared in abort */
698  proc->recoveryConflictPending = false;
699 
700  /* must be cleared with xid/xmin: */
701  /* avoid unnecessarily dirtying shared cachelines */
703  {
706  }
707 
708  /* Clear the subtransaction-XID cache too while holding the lock */
709  Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
711  if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
712  {
713  ProcGlobal->subxidStates[pgxactoff].count = 0;
714  ProcGlobal->subxidStates[pgxactoff].overflowed = false;
715  proc->subxidStatus.count = 0;
716  proc->subxidStatus.overflowed = false;
717  }
718 
719  /* Also advance global latestCompletedXid while holding the lock */
720  MaintainLatestCompletedXid(latestXid);
721 
722  /* Same with xactCompletionCount */
724 }
725 
726 /*
727  * ProcArrayGroupClearXid -- group XID clearing
728  *
729  * When we cannot immediately acquire ProcArrayLock in exclusive mode at
730  * commit time, add ourselves to a list of processes that need their XIDs
731  * cleared. The first process to add itself to the list will acquire
732  * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
733  * on behalf of all group members. This avoids a great deal of contention
734  * around ProcArrayLock when many processes are trying to commit at once,
735  * since the lock need not be repeatedly handed off from one committing
736  * process to the next.
737  */
738 static void
740 {
741  PROC_HDR *procglobal = ProcGlobal;
742  uint32 nextidx;
743  uint32 wakeidx;
744 
745  /* We should definitely have an XID to clear. */
747 
748  /* Add ourselves to the list of processes needing a group XID clear. */
749  proc->procArrayGroupMember = true;
750  proc->procArrayGroupMemberXid = latestXid;
751  nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
752  while (true)
753  {
754  pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx);
755 
757  &nextidx,
758  (uint32) proc->pgprocno))
759  break;
760  }
761 
762  /*
763  * If the list was not empty, the leader will clear our XID. It is
764  * impossible to have followers without a leader because the first process
765  * that has added itself to the list will always have nextidx as
766  * INVALID_PGPROCNO.
767  */
768  if (nextidx != INVALID_PGPROCNO)
769  {
770  int extraWaits = 0;
771 
772  /* Sleep until the leader clears our XID. */
774  for (;;)
775  {
776  /* acts as a read barrier */
777  PGSemaphoreLock(proc->sem);
778  if (!proc->procArrayGroupMember)
779  break;
780  extraWaits++;
781  }
783 
785 
786  /* Fix semaphore count for any absorbed wakeups */
787  while (extraWaits-- > 0)
788  PGSemaphoreUnlock(proc->sem);
789  return;
790  }
791 
792  /* We are the leader. Acquire the lock on behalf of everyone. */
793  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
794 
795  /*
796  * Now that we've got the lock, clear the list of processes waiting for
797  * group XID clearing, saving a pointer to the head of the list. Trying
798  * to pop elements one at a time could lead to an ABA problem.
799  */
800  nextidx = pg_atomic_exchange_u32(&procglobal->procArrayGroupFirst,
802 
803  /* Remember head of list so we can perform wakeups after dropping lock. */
804  wakeidx = nextidx;
805 
806  /* Walk the list and clear all XIDs. */
807  while (nextidx != INVALID_PGPROCNO)
808  {
809  PGPROC *proc = &allProcs[nextidx];
810 
812 
813  /* Move to next proc in list. */
814  nextidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
815  }
816 
817  /* We're done with the lock now. */
818  LWLockRelease(ProcArrayLock);
819 
820  /*
821  * Now that we've released the lock, go back and wake everybody up. We
822  * don't do this under the lock so as to keep lock hold times to a
823  * minimum. The system calls we need to perform to wake other processes
824  * up are probably much slower than the simple memory writes we did while
825  * holding the lock.
826  */
827  while (wakeidx != INVALID_PGPROCNO)
828  {
829  PGPROC *proc = &allProcs[wakeidx];
830 
831  wakeidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
833 
834  /* ensure all previous writes are visible before follower continues. */
836 
837  proc->procArrayGroupMember = false;
838 
839  if (proc != MyProc)
840  PGSemaphoreUnlock(proc->sem);
841  }
842 }
843 
844 /*
845  * ProcArrayClearTransaction -- clear the transaction fields
846  *
847  * This is used after successfully preparing a 2-phase transaction. We are
848  * not actually reporting the transaction's XID as no longer running --- it
849  * will still appear as running because the 2PC's gxact is in the ProcArray
850  * too. We just have to clear out our own PGPROC.
851  */
852 void
854 {
855  size_t pgxactoff;
856 
857  /*
858  * Currently we need to lock ProcArrayLock exclusively here, as we
859  * increment xactCompletionCount below. We also need it at least in shared
860  * mode for pgproc->pgxactoff to stay the same below.
861  *
862  * We could however, as this action does not actually change anyone's view
863  * of the set of running XIDs (our entry is duplicate with the gxact that
864  * has already been inserted into the ProcArray), lower the lock level to
865  * shared if we were to make xactCompletionCount an atomic variable. But
866  * that doesn't seem worth it currently, as a 2PC commit is heavyweight
867  * enough for this not to be the bottleneck. If it ever becomes a
868  * bottleneck it may also be worth considering to combine this with the
869  * subsequent ProcArrayRemove()
870  */
871  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
872 
873  pgxactoff = proc->pgxactoff;
874 
875  ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
876  proc->xid = InvalidTransactionId;
877 
879  proc->xmin = InvalidTransactionId;
880  proc->recoveryConflictPending = false;
881 
883  Assert(!proc->delayChkpt);
884 
885  /*
886  * Need to increment completion count even though transaction hasn't
887  * really committed yet. The reason for that is that GetSnapshotData()
888  * omits the xid of the current transaction, thus without the increment we
889  * otherwise could end up reusing the snapshot later. Which would be bad,
890  * because it might not count the prepared transaction as running.
891  */
893 
894  /* Clear the subtransaction-XID cache too */
895  Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
897  if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
898  {
899  ProcGlobal->subxidStates[pgxactoff].count = 0;
900  ProcGlobal->subxidStates[pgxactoff].overflowed = false;
901  proc->subxidStatus.count = 0;
902  proc->subxidStatus.overflowed = false;
903  }
904 
905  LWLockRelease(ProcArrayLock);
906 }
907 
908 /*
909  * Update ShmemVariableCache->latestCompletedXid to point to latestXid if
910  * currently older.
911  */
912 static void
914 {
916 
917  Assert(FullTransactionIdIsValid(cur_latest));
919  Assert(LWLockHeldByMe(ProcArrayLock));
920 
921  if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
922  {
924  FullXidRelativeTo(cur_latest, latestXid);
925  }
926 
929 }
930 
931 /*
932  * Same as MaintainLatestCompletedXid, except for use during WAL replay.
933  */
934 static void
936 {
938  FullTransactionId rel;
939 
941  Assert(LWLockHeldByMe(ProcArrayLock));
942 
943  /*
944  * Need a FullTransactionId to compare latestXid with. Can't rely on
945  * latestCompletedXid to be initialized in recovery. But in recovery it's
946  * safe to access nextXid without a lock for the startup process.
947  */
950 
951  if (!FullTransactionIdIsValid(cur_latest) ||
952  TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
953  {
955  FullXidRelativeTo(rel, latestXid);
956  }
957 
959 }
960 
961 /*
962  * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
963  *
964  * Remember up to where the startup process initialized the CLOG and subtrans
965  * so we can ensure it's initialized gaplessly up to the point where necessary
966  * while in recovery.
967  */
968 void
970 {
972  Assert(TransactionIdIsNormal(initializedUptoXID));
973 
974  /*
975  * we set latestObservedXid to the xid SUBTRANS has been initialized up
976  * to, so we can extend it from that point onwards in
977  * RecordKnownAssignedTransactionIds, and when we get consistent in
978  * ProcArrayApplyRecoveryInfo().
979  */
980  latestObservedXid = initializedUptoXID;
982 }
983 
984 /*
985  * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
986  *
987  * Takes us through 3 states: Initialized, Pending and Ready.
988  * Normal case is to go all the way to Ready straight away, though there
989  * are atypical cases where we need to take it in steps.
990  *
991  * Use the data about running transactions on the primary to create the initial
992  * state of KnownAssignedXids. We also use these records to regularly prune
993  * KnownAssignedXids because we know it is possible that some transactions
994  * with FATAL errors fail to write abort records, which could cause eventual
995  * overflow.
996  *
997  * See comments for LogStandbySnapshot().
998  */
999 void
1001 {
1002  TransactionId *xids;
1003  int nxids;
1004  int i;
1005 
1007  Assert(TransactionIdIsValid(running->nextXid));
1010 
1011  /*
1012  * Remove stale transactions, if any.
1013  */
1015 
1016  /*
1017  * Remove stale locks, if any.
1018  */
1020 
1021  /*
1022  * If our snapshot is already valid, nothing else to do...
1023  */
1025  return;
1026 
1027  /*
1028  * If our initial RunningTransactionsData had an overflowed snapshot then
1029  * we knew we were missing some subxids from our snapshot. If we continue
1030  * to see overflowed snapshots then we might never be able to start up, so
1031  * we make another test to see if our snapshot is now valid. We know that
1032  * the missing subxids are equal to or earlier than nextXid. After we
1033  * initialise we continue to apply changes during recovery, so once the
1034  * oldestRunningXid is later than the nextXid from the initial snapshot we
1035  * know that we no longer have missing information and can mark the
1036  * snapshot as valid.
1037  */
1039  {
1040  /*
1041  * If the snapshot isn't overflowed or if its empty we can reset our
1042  * pending state and use this snapshot instead.
1043  */
1044  if (!running->subxid_overflow || running->xcnt == 0)
1045  {
1046  /*
1047  * If we have already collected known assigned xids, we need to
1048  * throw them away before we apply the recovery snapshot.
1049  */
1052  }
1053  else
1054  {
1056  running->oldestRunningXid))
1057  {
1060  "recovery snapshots are now enabled");
1061  }
1062  else
1064  "recovery snapshot waiting for non-overflowed snapshot or "
1065  "until oldest active xid on standby is at least %u (now %u)",
1067  running->oldestRunningXid);
1068  return;
1069  }
1070  }
1071 
1073 
1074  /*
1075  * NB: this can be reached at least twice, so make sure new code can deal
1076  * with that.
1077  */
1078 
1079  /*
1080  * Nobody else is running yet, but take locks anyhow
1081  */
1082  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1083 
1084  /*
1085  * KnownAssignedXids is sorted so we cannot just add the xids, we have to
1086  * sort them first.
1087  *
1088  * Some of the new xids are top-level xids and some are subtransactions.
1089  * We don't call SubTransSetParent because it doesn't matter yet. If we
1090  * aren't overflowed then all xids will fit in snapshot and so we don't
1091  * need subtrans. If we later overflow, an xid assignment record will add
1092  * xids to subtrans. If RunningTransactionsData is overflowed then we
1093  * don't have enough information to correctly update subtrans anyway.
1094  */
1095 
1096  /*
1097  * Allocate a temporary array to avoid modifying the array passed as
1098  * argument.
1099  */
1100  xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt));
1101 
1102  /*
1103  * Add to the temp array any xids which have not already completed.
1104  */
1105  nxids = 0;
1106  for (i = 0; i < running->xcnt + running->subxcnt; i++)
1107  {
1108  TransactionId xid = running->xids[i];
1109 
1110  /*
1111  * The running-xacts snapshot can contain xids that were still visible
1112  * in the procarray when the snapshot was taken, but were already
1113  * WAL-logged as completed. They're not running anymore, so ignore
1114  * them.
1115  */
1117  continue;
1118 
1119  xids[nxids++] = xid;
1120  }
1121 
1122  if (nxids > 0)
1123  {
1124  if (procArray->numKnownAssignedXids != 0)
1125  {
1126  LWLockRelease(ProcArrayLock);
1127  elog(ERROR, "KnownAssignedXids is not empty");
1128  }
1129 
1130  /*
1131  * Sort the array so that we can add them safely into
1132  * KnownAssignedXids.
1133  */
1134  qsort(xids, nxids, sizeof(TransactionId), xidComparator);
1135 
1136  /*
1137  * Add the sorted snapshot into KnownAssignedXids. The running-xacts
1138  * snapshot may include duplicated xids because of prepared
1139  * transactions, so ignore them.
1140  */
1141  for (i = 0; i < nxids; i++)
1142  {
1143  if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i]))
1144  {
1145  elog(DEBUG1,
1146  "found duplicated transaction %u for KnownAssignedXids insertion",
1147  xids[i]);
1148  continue;
1149  }
1150  KnownAssignedXidsAdd(xids[i], xids[i], true);
1151  }
1152 
1154  }
1155 
1156  pfree(xids);
1157 
1158  /*
1159  * latestObservedXid is at least set to the point where SUBTRANS was
1160  * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid
1161  * RecordKnownAssignedTransactionIds() was called for. Initialize
1162  * subtrans from thereon, up to nextXid - 1.
1163  *
1164  * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
1165  * because we've just added xids to the known assigned xids machinery that
1166  * haven't gone through RecordKnownAssignedTransactionId().
1167  */
1171  {
1174  }
1175  TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */
1176 
1177  /* ----------
1178  * Now we've got the running xids we need to set the global values that
1179  * are used to track snapshots as they evolve further.
1180  *
1181  * - latestCompletedXid which will be the xmax for snapshots
1182  * - lastOverflowedXid which shows whether snapshots overflow
1183  * - nextXid
1184  *
1185  * If the snapshot overflowed, then we still initialise with what we know,
1186  * but the recovery snapshot isn't fully valid yet because we know there
1187  * are some subxids missing. We don't know the specific subxids that are
1188  * missing, so conservatively assume the last one is latestObservedXid.
1189  * ----------
1190  */
1191  if (running->subxid_overflow)
1192  {
1194 
1196  procArray->lastOverflowedXid = latestObservedXid;
1197  }
1198  else
1199  {
1201 
1203  }
1204 
1205  /*
1206  * If a transaction wrote a commit record in the gap between taking and
1207  * logging the snapshot then latestCompletedXid may already be higher than
1208  * the value from the snapshot, so check before we use the incoming value.
1209  * It also might not yet be set at all.
1210  */
1212 
1213  LWLockRelease(ProcArrayLock);
1214 
1215  /* ShmemVariableCache->nextXid must be beyond any observed xid. */
1217 
1219 
1222  elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled");
1223  else
1225  "recovery snapshot waiting for non-overflowed snapshot or "
1226  "until oldest active xid on standby is at least %u (now %u)",
1228  running->oldestRunningXid);
1229 }
1230 
1231 /*
1232  * ProcArrayApplyXidAssignment
1233  * Process an XLOG_XACT_ASSIGNMENT WAL record
1234  */
1235 void
1237  int nsubxids, TransactionId *subxids)
1238 {
1239  TransactionId max_xid;
1240  int i;
1241 
1243 
1244  max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
1245 
1246  /*
1247  * Mark all the subtransactions as observed.
1248  *
1249  * NOTE: This will fail if the subxid contains too many previously
1250  * unobserved xids to fit into known-assigned-xids. That shouldn't happen
1251  * as the code stands, because xid-assignment records should never contain
1252  * more than PGPROC_MAX_CACHED_SUBXIDS entries.
1253  */
1255 
1256  /*
1257  * Notice that we update pg_subtrans with the top-level xid, rather than
1258  * the parent xid. This is a difference between normal processing and
1259  * recovery, yet is still correct in all cases. The reason is that
1260  * subtransaction commit is not marked in clog until commit processing, so
1261  * all aborted subtransactions have already been clearly marked in clog.
1262  * As a result we are able to refer directly to the top-level
1263  * transaction's state rather than skipping through all the intermediate
1264  * states in the subtransaction tree. This should be the first time we
1265  * have attempted to SubTransSetParent().
1266  */
1267  for (i = 0; i < nsubxids; i++)
1268  SubTransSetParent(subxids[i], topxid);
1269 
1270  /* KnownAssignedXids isn't maintained yet, so we're done for now */
1272  return;
1273 
1274  /*
1275  * Uses same locking as transaction commit
1276  */
1277  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1278 
1279  /*
1280  * Remove subxids from known-assigned-xacts.
1281  */
1283 
1284  /*
1285  * Advance lastOverflowedXid to be at least the last of these subxids.
1286  */
1287  if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid))
1288  procArray->lastOverflowedXid = max_xid;
1289 
1290  LWLockRelease(ProcArrayLock);
1291 }
1292 
1293 /*
1294  * TransactionIdIsInProgress -- is given transaction running in some backend
1295  *
1296  * Aside from some shortcuts such as checking RecentXmin and our own Xid,
1297  * there are four possibilities for finding a running transaction:
1298  *
1299  * 1. The given Xid is a main transaction Id. We will find this out cheaply
1300  * by looking at ProcGlobal->xids.
1301  *
1302  * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
1303  * We can find this out cheaply too.
1304  *
1305  * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
1306  * if the Xid is running on the primary.
1307  *
1308  * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
1309  * if that is running according to ProcGlobal->xids[] or KnownAssignedXids.
1310  * This is the slowest way, but sadly it has to be done always if the others
1311  * failed, unless we see that the cached subxact sets are complete (none have
1312  * overflowed).
1313  *
1314  * ProcArrayLock has to be held while we do 1, 2, 3. If we save the top Xids
1315  * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
1316  * This buys back some concurrency (and we can't retrieve the main Xids from
1317  * ProcGlobal->xids[] again anyway; see GetNewTransactionId).
1318  */
1319 bool
1321 {
1322  static TransactionId *xids = NULL;
1323  static TransactionId *other_xids;
1324  XidCacheStatus *other_subxidstates;
1325  int nxids = 0;
1326  ProcArrayStruct *arrayP = procArray;
1327  TransactionId topxid;
1328  TransactionId latestCompletedXid;
1329  int mypgxactoff;
1330  size_t numProcs;
1331  int j;
1332 
1333  /*
1334  * Don't bother checking a transaction older than RecentXmin; it could not
1335  * possibly still be running. (Note: in particular, this guarantees that
1336  * we reject InvalidTransactionId, FrozenTransactionId, etc as not
1337  * running.)
1338  */
1340  {
1342  return false;
1343  }
1344 
1345  /*
1346  * We may have just checked the status of this transaction, so if it is
1347  * already known to be completed, we can fall out without any access to
1348  * shared memory.
1349  */
1351  {
1353  return false;
1354  }
1355 
1356  /*
1357  * Also, we can handle our own transaction (and subtransactions) without
1358  * any access to shared memory.
1359  */
1361  {
1363  return true;
1364  }
1365 
1366  /*
1367  * If first time through, get workspace to remember main XIDs in. We
1368  * malloc it permanently to avoid repeated palloc/pfree overhead.
1369  */
1370  if (xids == NULL)
1371  {
1372  /*
1373  * In hot standby mode, reserve enough space to hold all xids in the
1374  * known-assigned list. If we later finish recovery, we no longer need
1375  * the bigger array, but we don't bother to shrink it.
1376  */
1377  int maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs;
1378 
1379  xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId));
1380  if (xids == NULL)
1381  ereport(ERROR,
1382  (errcode(ERRCODE_OUT_OF_MEMORY),
1383  errmsg("out of memory")));
1384  }
1385 
1386  other_xids = ProcGlobal->xids;
1387  other_subxidstates = ProcGlobal->subxidStates;
1388 
1389  LWLockAcquire(ProcArrayLock, LW_SHARED);
1390 
1391  /*
1392  * Now that we have the lock, we can check latestCompletedXid; if the
1393  * target Xid is after that, it's surely still running.
1394  */
1395  latestCompletedXid =
1397  if (TransactionIdPrecedes(latestCompletedXid, xid))
1398  {
1399  LWLockRelease(ProcArrayLock);
1401  return true;
1402  }
1403 
1404  /* No shortcuts, gotta grovel through the array */
1405  mypgxactoff = MyProc->pgxactoff;
1406  numProcs = arrayP->numProcs;
1407  for (size_t pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
1408  {
1409  int pgprocno;
1410  PGPROC *proc;
1411  TransactionId pxid;
1412  int pxids;
1413 
1414  /* Ignore ourselves --- dealt with it above */
1415  if (pgxactoff == mypgxactoff)
1416  continue;
1417 
1418  /* Fetch xid just once - see GetNewTransactionId */
1419  pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
1420 
1421  if (!TransactionIdIsValid(pxid))
1422  continue;
1423 
1424  /*
1425  * Step 1: check the main Xid
1426  */
1427  if (TransactionIdEquals(pxid, xid))
1428  {
1429  LWLockRelease(ProcArrayLock);
1431  return true;
1432  }
1433 
1434  /*
1435  * We can ignore main Xids that are younger than the target Xid, since
1436  * the target could not possibly be their child.
1437  */
1438  if (TransactionIdPrecedes(xid, pxid))
1439  continue;
1440 
1441  /*
1442  * Step 2: check the cached child-Xids arrays
1443  */
1444  pxids = other_subxidstates[pgxactoff].count;
1445  pg_read_barrier(); /* pairs with barrier in GetNewTransactionId() */
1446  pgprocno = arrayP->pgprocnos[pgxactoff];
1447  proc = &allProcs[pgprocno];
1448  for (j = pxids - 1; j >= 0; j--)
1449  {
1450  /* Fetch xid just once - see GetNewTransactionId */
1451  TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]);
1452 
1453  if (TransactionIdEquals(cxid, xid))
1454  {
1455  LWLockRelease(ProcArrayLock);
1457  return true;
1458  }
1459  }
1460 
1461  /*
1462  * Save the main Xid for step 4. We only need to remember main Xids
1463  * that have uncached children. (Note: there is no race condition
1464  * here because the overflowed flag cannot be cleared, only set, while
1465  * we hold ProcArrayLock. So we can't miss an Xid that we need to
1466  * worry about.)
1467  */
1468  if (other_subxidstates[pgxactoff].overflowed)
1469  xids[nxids++] = pxid;
1470  }
1471 
1472  /*
1473  * Step 3: in hot standby mode, check the known-assigned-xids list. XIDs
1474  * in the list must be treated as running.
1475  */
1476  if (RecoveryInProgress())
1477  {
1478  /* none of the PGPROC entries should have XIDs in hot standby mode */
1479  Assert(nxids == 0);
1480 
1481  if (KnownAssignedXidExists(xid))
1482  {
1483  LWLockRelease(ProcArrayLock);
1485  return true;
1486  }
1487 
1488  /*
1489  * If the KnownAssignedXids overflowed, we have to check pg_subtrans
1490  * too. Fetch all xids from KnownAssignedXids that are lower than
1491  * xid, since if xid is a subtransaction its parent will always have a
1492  * lower value. Note we will collect both main and subXIDs here, but
1493  * there's no help for it.
1494  */
1495  if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid))
1496  nxids = KnownAssignedXidsGet(xids, xid);
1497  }
1498 
1499  LWLockRelease(ProcArrayLock);
1500 
1501  /*
1502  * If none of the relevant caches overflowed, we know the Xid is not
1503  * running without even looking at pg_subtrans.
1504  */
1505  if (nxids == 0)
1506  {
1508  return false;
1509  }
1510 
1511  /*
1512  * Step 4: have to check pg_subtrans.
1513  *
1514  * At this point, we know it's either a subtransaction of one of the Xids
1515  * in xids[], or it's not running. If it's an already-failed
1516  * subtransaction, we want to say "not running" even though its parent may
1517  * still be running. So first, check pg_xact to see if it's been aborted.
1518  */
1520 
1521  if (TransactionIdDidAbort(xid))
1522  return false;
1523 
1524  /*
1525  * It isn't aborted, so check whether the transaction tree it belongs to
1526  * is still running (or, more precisely, whether it was running when we
1527  * held ProcArrayLock).
1528  */
1529  topxid = SubTransGetTopmostTransaction(xid);
1530  Assert(TransactionIdIsValid(topxid));
1531  if (!TransactionIdEquals(topxid, xid))
1532  {
1533  for (int i = 0; i < nxids; i++)
1534  {
1535  if (TransactionIdEquals(xids[i], topxid))
1536  return true;
1537  }
1538  }
1539 
1540  return false;
1541 }
1542 
1543 /*
1544  * TransactionIdIsActive -- is xid the top-level XID of an active backend?
1545  *
1546  * This differs from TransactionIdIsInProgress in that it ignores prepared
1547  * transactions, as well as transactions running on the primary if we're in
1548  * hot standby. Also, we ignore subtransactions since that's not needed
1549  * for current uses.
1550  */
1551 bool
1553 {
1554  bool result = false;
1555  ProcArrayStruct *arrayP = procArray;
1556  TransactionId *other_xids = ProcGlobal->xids;
1557  int i;
1558 
1559  /*
1560  * Don't bother checking a transaction older than RecentXmin; it could not
1561  * possibly still be running.
1562  */
1564  return false;
1565 
1566  LWLockAcquire(ProcArrayLock, LW_SHARED);
1567 
1568  for (i = 0; i < arrayP->numProcs; i++)
1569  {
1570  int pgprocno = arrayP->pgprocnos[i];
1571  PGPROC *proc = &allProcs[pgprocno];
1572  TransactionId pxid;
1573 
1574  /* Fetch xid just once - see GetNewTransactionId */
1575  pxid = UINT32_ACCESS_ONCE(other_xids[i]);
1576 
1577  if (!TransactionIdIsValid(pxid))
1578  continue;
1579 
1580  if (proc->pid == 0)
1581  continue; /* ignore prepared transactions */
1582 
1583  if (TransactionIdEquals(pxid, xid))
1584  {
1585  result = true;
1586  break;
1587  }
1588  }
1589 
1590  LWLockRelease(ProcArrayLock);
1591 
1592  return result;
1593 }
1594 
1595 
1596 /*
1597  * Determine XID horizons.
1598  *
1599  * This is used by wrapper functions like GetOldestNonRemovableTransactionId()
1600  * (for VACUUM), GetReplicationHorizons() (for hot_standby_feedback), etc as
1601  * well as "internally" by GlobalVisUpdate() (see comment above struct
1602  * GlobalVisState).
1603  *
1604  * See the definition of ComputedXidHorizonsResult for the various computed
1605  * horizons.
1606  *
1607  * For VACUUM separate horizons (used to decide which deleted tuples must
1608  * be preserved), for shared and non-shared tables are computed. For shared
1609  * relations backends in all databases must be considered, but for non-shared
1610  * relations that's not required, since only backends in my own database could
1611  * ever see the tuples in them. Also, we can ignore concurrently running lazy
1612  * VACUUMs because (a) they must be working on other tables, and (b) they
1613  * don't need to do snapshot-based lookups.
1614  *
1615  * This also computes a horizon used to truncate pg_subtrans. For that
1616  * backends in all databases have to be considered, and concurrently running
1617  * lazy VACUUMs cannot be ignored, as they still may perform pg_subtrans
1618  * accesses.
1619  *
1620  * Note: we include all currently running xids in the set of considered xids.
1621  * This ensures that if a just-started xact has not yet set its snapshot,
1622  * when it does set the snapshot it cannot set xmin less than what we compute.
1623  * See notes in src/backend/access/transam/README.
1624  *
1625  * Note: despite the above, it's possible for the calculated values to move
1626  * backwards on repeated calls. The calculated values are conservative, so
1627  * that anything older is definitely not considered as running by anyone
1628  * anymore, but the exact values calculated depend on a number of things. For
1629  * example, if there are no transactions running in the current database, the
1630  * horizon for normal tables will be latestCompletedXid. If a transaction
1631  * begins after that, its xmin will include in-progress transactions in other
1632  * databases that started earlier, so another call will return a lower value.
1633  * Nonetheless it is safe to vacuum a table in the current database with the
1634  * first result. There are also replication-related effects: a walsender
1635  * process can set its xmin based on transactions that are no longer running
1636  * on the primary but are still being replayed on the standby, thus possibly
1637  * making the values go backwards. In this case there is a possibility that
1638  * we lose data that the standby would like to have, but unless the standby
1639  * uses a replication slot to make its xmin persistent there is little we can
1640  * do about that --- data is only protected if the walsender runs continuously
1641  * while queries are executed on the standby. (The Hot Standby code deals
1642  * with such cases by failing standby queries that needed to access
1643  * already-removed data, so there's no integrity bug.) The computed values
1644  * are also adjusted with vacuum_defer_cleanup_age, so increasing that setting
1645  * on the fly is another easy way to make horizons move backwards, with no
1646  * consequences for data integrity.
1647  *
1648  * Note: the approximate horizons (see definition of GlobalVisState) are
1649  * updated by the computations done here. That's currently required for
1650  * correctness and a small optimization. Without doing so it's possible that
1651  * heap vacuum's call to heap_page_prune() uses a more conservative horizon
1652  * than later when deciding which tuples can be removed - which the code
1653  * doesn't expect (breaking HOT).
1654  */
1655 static void
1657 {
1658  ProcArrayStruct *arrayP = procArray;
1659  TransactionId kaxmin;
1660  bool in_recovery = RecoveryInProgress();
1661  TransactionId *other_xids = ProcGlobal->xids;
1662 
1663  /* inferred after ProcArrayLock is released */
1665 
1666  LWLockAcquire(ProcArrayLock, LW_SHARED);
1667 
1669 
1670  /*
1671  * We initialize the MIN() calculation with latestCompletedXid + 1. This
1672  * is a lower bound for the XIDs that might appear in the ProcArray later,
1673  * and so protects us against overestimating the result due to future
1674  * additions.
1675  */
1676  {
1677  TransactionId initial;
1678 
1680  Assert(TransactionIdIsValid(initial));
1681  TransactionIdAdvance(initial);
1682 
1683  h->oldest_considered_running = initial;
1684  h->shared_oldest_nonremovable = initial;
1685  h->data_oldest_nonremovable = initial;
1686 
1687  /*
1688  * Only modifications made by this backend affect the horizon for
1689  * temporary relations. Instead of a check in each iteration of the
1690  * loop over all PGPROCs it is cheaper to just initialize to the
1691  * current top-level xid any.
1692  *
1693  * Without an assigned xid we could use a horizon as aggressive as
1694  * ReadNewTransactionid(), but we can get away with the much cheaper
1695  * latestCompletedXid + 1: If this backend has no xid there, by
1696  * definition, can't be any newer changes in the temp table than
1697  * latestCompletedXid.
1698  */
1701  else
1702  h->temp_oldest_nonremovable = initial;
1703  }
1704 
1705  /*
1706  * Fetch slot horizons while ProcArrayLock is held - the
1707  * LWLockAcquire/LWLockRelease are a barrier, ensuring this happens inside
1708  * the lock.
1709  */
1710  h->slot_xmin = procArray->replication_slot_xmin;
1712 
1713  for (int index = 0; index < arrayP->numProcs; index++)
1714  {
1715  int pgprocno = arrayP->pgprocnos[index];
1716  PGPROC *proc = &allProcs[pgprocno];
1717  int8 statusFlags = ProcGlobal->statusFlags[index];
1718  TransactionId xid;
1719  TransactionId xmin;
1720 
1721  /* Fetch xid just once - see GetNewTransactionId */
1722  xid = UINT32_ACCESS_ONCE(other_xids[index]);
1723  xmin = UINT32_ACCESS_ONCE(proc->xmin);
1724 
1725  /*
1726  * Consider both the transaction's Xmin, and its Xid.
1727  *
1728  * We must check both because a transaction might have an Xmin but not
1729  * (yet) an Xid; conversely, if it has an Xid, that could determine
1730  * some not-yet-set Xmin.
1731  */
1732  xmin = TransactionIdOlder(xmin, xid);
1733 
1734  /* if neither is set, this proc doesn't influence the horizon */
1735  if (!TransactionIdIsValid(xmin))
1736  continue;
1737 
1738  /*
1739  * Don't ignore any procs when determining which transactions might be
1740  * considered running. While slots should ensure logical decoding
1741  * backends are protected even without this check, it can't hurt to
1742  * include them here as well..
1743  */
1746 
1747  /*
1748  * Skip over backends either vacuuming (which is ok with rows being
1749  * removed, as long as pg_subtrans is not truncated) or doing logical
1750  * decoding (which manages xmin separately, check below).
1751  */
1752  if (statusFlags & (PROC_IN_VACUUM | PROC_IN_LOGICAL_DECODING))
1753  continue;
1754 
1755  /* shared tables need to take backends in all database into account */
1758 
1759  /*
1760  * Normally queries in other databases are ignored for anything but
1761  * the shared horizon. But in recovery we cannot compute an accurate
1762  * per-database horizon as all xids are managed via the
1763  * KnownAssignedXids machinery.
1764  *
1765  * Be careful to compute a pessimistic value when MyDatabaseId is not
1766  * set. If this is a backend in the process of starting up, we may not
1767  * use a "too aggressive" horizon (otherwise we could end up using it
1768  * to prune still needed data away). If the current backend never
1769  * connects to a database that is harmless, because
1770  * data_oldest_nonremovable will never be utilized.
1771  */
1772  if (in_recovery ||
1774  proc->databaseId == 0) /* always include WalSender */
1775  {
1778  }
1779  }
1780 
1781  /*
1782  * If in recovery fetch oldest xid in KnownAssignedXids, will be applied
1783  * after lock is released.
1784  */
1785  if (in_recovery)
1786  kaxmin = KnownAssignedXidsGetOldestXmin();
1787 
1788  /*
1789  * No other information from shared state is needed, release the lock
1790  * immediately. The rest of the computations can be done without a lock.
1791  */
1792  LWLockRelease(ProcArrayLock);
1793 
1794  if (in_recovery)
1795  {
1802  /* temp relations cannot be accessed in recovery */
1803  }
1804  else
1805  {
1806  /*
1807  * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age.
1808  *
1809  * vacuum_defer_cleanup_age provides some additional "slop" for the
1810  * benefit of hot standby queries on standby servers. This is quick
1811  * and dirty, and perhaps not all that useful unless the primary has a
1812  * predictable transaction rate, but it offers some protection when
1813  * there's no walsender connection. Note that we are assuming
1814  * vacuum_defer_cleanup_age isn't large enough to cause wraparound ---
1815  * so guc.c should limit it to no more than the xidStopLimit threshold
1816  * in varsup.c. Also note that we intentionally don't apply
1817  * vacuum_defer_cleanup_age on standby servers.
1818  */
1828  /* defer doesn't apply to temp relations */
1829  }
1830 
1831  /*
1832  * Check whether there are replication slots requiring an older xmin.
1833  */
1838 
1839  /*
1840  * The only difference between catalog / data horizons is that the slot's
1841  * catalog xmin is applied to the catalog one (so catalogs can be accessed
1842  * for logical decoding). Initialize with data horizon, and then back up
1843  * further if necessary. Have to back up the shared horizon as well, since
1844  * that also can contain catalogs.
1845  */
1849  h->slot_catalog_xmin);
1853  h->slot_catalog_xmin);
1854 
1855  /*
1856  * It's possible that slots / vacuum_defer_cleanup_age backed up the
1857  * horizons further than oldest_considered_running. Fix.
1858  */
1868 
1869  /*
1870  * shared horizons have to be at least as old as the oldest visible in
1871  * current db
1872  */
1877 
1878  /*
1879  * Horizons need to ensure that pg_subtrans access is still possible for
1880  * the relevant backends.
1881  */
1892  h->slot_xmin));
1895  h->slot_catalog_xmin));
1896 
1897  /* update approximate horizons with the computed horizons */
1899 }
1900 
1901 /*
1902  * Return the oldest XID for which deleted tuples must be preserved in the
1903  * passed table.
1904  *
1905  * If rel is not NULL the horizon may be considerably more recent than
1906  * otherwise (i.e. fewer tuples will be removable). In the NULL case a horizon
1907  * that is correct (but not optimal) for all relations will be returned.
1908  *
1909  * This is used by VACUUM to decide which deleted tuples must be preserved in
1910  * the passed in table.
1911  */
1914 {
1915  ComputeXidHorizonsResult horizons;
1916 
1917  ComputeXidHorizons(&horizons);
1918 
1919  /* select horizon appropriate for relation */
1920  if (rel == NULL || rel->rd_rel->relisshared)
1921  return horizons.shared_oldest_nonremovable;
1923  return horizons.catalog_oldest_nonremovable;
1924  else if (RELATION_IS_LOCAL(rel))
1925  return horizons.temp_oldest_nonremovable;
1926  else
1927  return horizons.data_oldest_nonremovable;
1928 }
1929 
1930 /*
1931  * Return the oldest transaction id any currently running backend might still
1932  * consider running. This should not be used for visibility / pruning
1933  * determinations (see GetOldestNonRemovableTransactionId()), but for
1934  * decisions like up to where pg_subtrans can be truncated.
1935  */
1938 {
1939  ComputeXidHorizonsResult horizons;
1940 
1941  ComputeXidHorizons(&horizons);
1942 
1943  return horizons.oldest_considered_running;
1944 }
1945 
1946 /*
1947  * Return the visibility horizons for a hot standby feedback message.
1948  */
1949 void
1951 {
1952  ComputeXidHorizonsResult horizons;
1953 
1954  ComputeXidHorizons(&horizons);
1955 
1956  /*
1957  * Don't want to use shared_oldest_nonremovable here, as that contains the
1958  * effect of replication slot's catalog_xmin. We want to send a separate
1959  * feedback for the catalog horizon, so the primary can remove data table
1960  * contents more aggressively.
1961  */
1962  *xmin = horizons.shared_oldest_nonremovable_raw;
1963  *catalog_xmin = horizons.slot_catalog_xmin;
1964 }
1965 
1966 /*
1967  * GetMaxSnapshotXidCount -- get max size for snapshot XID array
1968  *
1969  * We have to export this for use by snapmgr.c.
1970  */
1971 int
1973 {
1974  return procArray->maxProcs;
1975 }
1976 
1977 /*
1978  * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
1979  *
1980  * We have to export this for use by snapmgr.c.
1981  */
1982 int
1984 {
1985  return TOTAL_MAX_CACHED_SUBXIDS;
1986 }
1987 
1988 /*
1989  * Initialize old_snapshot_threshold specific parts of a newly build snapshot.
1990  */
1991 static void
1993 {
1995  {
1996  /*
1997  * If not using "snapshot too old" feature, fill related fields with
1998  * dummy values that don't require any locking.
1999  */
2000  snapshot->lsn = InvalidXLogRecPtr;
2001  snapshot->whenTaken = 0;
2002  }
2003  else
2004  {
2005  /*
2006  * Capture the current time and WAL stream location in case this
2007  * snapshot becomes old enough to need to fall back on the special
2008  * "old snapshot" logic.
2009  */
2010  snapshot->lsn = GetXLogInsertRecPtr();
2011  snapshot->whenTaken = GetSnapshotCurrentTimestamp();
2012  MaintainOldSnapshotTimeMapping(snapshot->whenTaken, snapshot->xmin);
2013  }
2014 }
2015 
2016 /*
2017  * Helper function for GetSnapshotData() that checks if the bulk of the
2018  * visibility information in the snapshot is still valid. If so, it updates
2019  * the fields that need to change and returns true. Otherwise it returns
2020  * false.
2021  *
2022  * This very likely can be evolved to not need ProcArrayLock held (at very
2023  * least in the case we already hold a snapshot), but that's for another day.
2024  */
2025 static bool
2027 {
2028  uint64 curXactCompletionCount;
2029 
2030  Assert(LWLockHeldByMe(ProcArrayLock));
2031 
2032  if (unlikely(snapshot->snapXactCompletionCount == 0))
2033  return false;
2034 
2035  curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
2036  if (curXactCompletionCount != snapshot->snapXactCompletionCount)
2037  return false;
2038 
2039  /*
2040  * If the current xactCompletionCount is still the same as it was at the
2041  * time the snapshot was built, we can be sure that rebuilding the
2042  * contents of the snapshot the hard way would result in the same snapshot
2043  * contents:
2044  *
2045  * As explained in transam/README, the set of xids considered running by
2046  * GetSnapshotData() cannot change while ProcArrayLock is held. Snapshot
2047  * contents only depend on transactions with xids and xactCompletionCount
2048  * is incremented whenever a transaction with an xid finishes (while
2049  * holding ProcArrayLock) exclusively). Thus the xactCompletionCount check
2050  * ensures we would detect if the snapshot would have changed.
2051  *
2052  * As the snapshot contents are the same as it was before, it is is safe
2053  * to re-enter the snapshot's xmin into the PGPROC array. None of the rows
2054  * visible under the snapshot could already have been removed (that'd
2055  * require the set of running transactions to change) and it fulfills the
2056  * requirement that concurrent GetSnapshotData() calls yield the same
2057  * xmin.
2058  */
2060  MyProc->xmin = TransactionXmin = snapshot->xmin;
2061 
2062  RecentXmin = snapshot->xmin;
2064 
2065  snapshot->curcid = GetCurrentCommandId(false);
2066  snapshot->active_count = 0;
2067  snapshot->regd_count = 0;
2068  snapshot->copied = false;
2069 
2071 
2072  return true;
2073 }
2074 
2075 /*
2076  * GetSnapshotData -- returns information about running transactions.
2077  *
2078  * The returned snapshot includes xmin (lowest still-running xact ID),
2079  * xmax (highest completed xact ID + 1), and a list of running xact IDs
2080  * in the range xmin <= xid < xmax. It is used as follows:
2081  * All xact IDs < xmin are considered finished.
2082  * All xact IDs >= xmax are considered still running.
2083  * For an xact ID xmin <= xid < xmax, consult list to see whether
2084  * it is considered running or not.
2085  * This ensures that the set of transactions seen as "running" by the
2086  * current xact will not change after it takes the snapshot.
2087  *
2088  * All running top-level XIDs are included in the snapshot, except for lazy
2089  * VACUUM processes. We also try to include running subtransaction XIDs,
2090  * but since PGPROC has only a limited cache area for subxact XIDs, full
2091  * information may not be available. If we find any overflowed subxid arrays,
2092  * we have to mark the snapshot's subxid data as overflowed, and extra work
2093  * *may* need to be done to determine what's running (see XidInMVCCSnapshot()
2094  * in heapam_visibility.c).
2095  *
2096  * We also update the following backend-global variables:
2097  * TransactionXmin: the oldest xmin of any snapshot in use in the
2098  * current transaction (this is the same as MyProc->xmin).
2099  * RecentXmin: the xmin computed for the most recent snapshot. XIDs
2100  * older than this are known not running any more.
2101  *
2102  * And try to advance the bounds of GlobalVis{Shared,Catalog,Data,Temp}Rels
2103  * for the benefit of theGlobalVisTest* family of functions.
2104  *
2105  * Note: this function should probably not be called with an argument that's
2106  * not statically allocated (see xip allocation below).
2107  */
2108 Snapshot
2110 {
2111  ProcArrayStruct *arrayP = procArray;
2112  TransactionId *other_xids = ProcGlobal->xids;
2113  TransactionId xmin;
2114  TransactionId xmax;
2115  size_t count = 0;
2116  int subcount = 0;
2117  bool suboverflowed = false;
2118  FullTransactionId latest_completed;
2119  TransactionId oldestxid;
2120  int mypgxactoff;
2121  TransactionId myxid;
2122  uint64 curXactCompletionCount;
2123 
2126 
2127  Assert(snapshot != NULL);
2128 
2129  /*
2130  * Allocating space for maxProcs xids is usually overkill; numProcs would
2131  * be sufficient. But it seems better to do the malloc while not holding
2132  * the lock, so we can't look at numProcs. Likewise, we allocate much
2133  * more subxip storage than is probably needed.
2134  *
2135  * This does open a possibility for avoiding repeated malloc/free: since
2136  * maxProcs does not change at runtime, we can simply reuse the previous
2137  * xip arrays if any. (This relies on the fact that all callers pass
2138  * static SnapshotData structs.)
2139  */
2140  if (snapshot->xip == NULL)
2141  {
2142  /*
2143  * First call for this snapshot. Snapshot is same size whether or not
2144  * we are in recovery, see later comments.
2145  */
2146  snapshot->xip = (TransactionId *)
2148  if (snapshot->xip == NULL)
2149  ereport(ERROR,
2150  (errcode(ERRCODE_OUT_OF_MEMORY),
2151  errmsg("out of memory")));
2152  Assert(snapshot->subxip == NULL);
2153  snapshot->subxip = (TransactionId *)
2155  if (snapshot->subxip == NULL)
2156  ereport(ERROR,
2157  (errcode(ERRCODE_OUT_OF_MEMORY),
2158  errmsg("out of memory")));
2159  }
2160 
2161  /*
2162  * It is sufficient to get shared lock on ProcArrayLock, even if we are
2163  * going to set MyProc->xmin.
2164  */
2165  LWLockAcquire(ProcArrayLock, LW_SHARED);
2166 
2167  if (GetSnapshotDataReuse(snapshot))
2168  {
2169  LWLockRelease(ProcArrayLock);
2170  return snapshot;
2171  }
2172 
2173  latest_completed = ShmemVariableCache->latestCompletedXid;
2174  mypgxactoff = MyProc->pgxactoff;
2175  myxid = other_xids[mypgxactoff];
2176  Assert(myxid == MyProc->xid);
2177 
2178  oldestxid = ShmemVariableCache->oldestXid;
2179  curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
2180 
2181  /* xmax is always latestCompletedXid + 1 */
2182  xmax = XidFromFullTransactionId(latest_completed);
2183  TransactionIdAdvance(xmax);
2185 
2186  /* initialize xmin calculation with xmax */
2187  xmin = xmax;
2188 
2189  /* take own xid into account, saves a check inside the loop */
2190  if (TransactionIdIsNormal(myxid) && NormalTransactionIdPrecedes(myxid, xmin))
2191  xmin = myxid;
2192 
2194 
2195  if (!snapshot->takenDuringRecovery)
2196  {
2197  size_t numProcs = arrayP->numProcs;
2198  TransactionId *xip = snapshot->xip;
2199  int *pgprocnos = arrayP->pgprocnos;
2200  XidCacheStatus *subxidStates = ProcGlobal->subxidStates;
2201  uint8 *allStatusFlags = ProcGlobal->statusFlags;
2202 
2203  /*
2204  * First collect set of pgxactoff/xids that need to be included in the
2205  * snapshot.
2206  */
2207  for (size_t pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
2208  {
2209  /* Fetch xid just once - see GetNewTransactionId */
2210  TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
2211  uint8 statusFlags;
2212 
2213  Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff);
2214 
2215  /*
2216  * If the transaction has no XID assigned, we can skip it; it
2217  * won't have sub-XIDs either.
2218  */
2219  if (likely(xid == InvalidTransactionId))
2220  continue;
2221 
2222  /*
2223  * We don't include our own XIDs (if any) in the snapshot. It
2224  * needs to be includeded in the xmin computation, but we did so
2225  * outside the loop.
2226  */
2227  if (pgxactoff == mypgxactoff)
2228  continue;
2229 
2230  /*
2231  * The only way we are able to get here with a non-normal xid
2232  * is during bootstrap - with this backend using
2233  * BootstrapTransactionId. But the above test should filter
2234  * that out.
2235  */
2237 
2238  /*
2239  * If the XID is >= xmax, we can skip it; such transactions will
2240  * be treated as running anyway (and any sub-XIDs will also be >=
2241  * xmax).
2242  */
2243  if (!NormalTransactionIdPrecedes(xid, xmax))
2244  continue;
2245 
2246  /*
2247  * Skip over backends doing logical decoding which manages xmin
2248  * separately (check below) and ones running LAZY VACUUM.
2249  */
2250  statusFlags = allStatusFlags[pgxactoff];
2251  if (statusFlags & (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM))
2252  continue;
2253 
2254  if (NormalTransactionIdPrecedes(xid, xmin))
2255  xmin = xid;
2256 
2257  /* Add XID to snapshot. */
2258  xip[count++] = xid;
2259 
2260  /*
2261  * Save subtransaction XIDs if possible (if we've already
2262  * overflowed, there's no point). Note that the subxact XIDs must
2263  * be later than their parent, so no need to check them against
2264  * xmin. We could filter against xmax, but it seems better not to
2265  * do that much work while holding the ProcArrayLock.
2266  *
2267  * The other backend can add more subxids concurrently, but cannot
2268  * remove any. Hence it's important to fetch nxids just once.
2269  * Should be safe to use memcpy, though. (We needn't worry about
2270  * missing any xids added concurrently, because they must postdate
2271  * xmax.)
2272  *
2273  * Again, our own XIDs are not included in the snapshot.
2274  */
2275  if (!suboverflowed)
2276  {
2277 
2278  if (subxidStates[pgxactoff].overflowed)
2279  suboverflowed = true;
2280  else
2281  {
2282  int nsubxids = subxidStates[pgxactoff].count;
2283 
2284  if (nsubxids > 0)
2285  {
2286  int pgprocno = pgprocnos[pgxactoff];
2287  PGPROC *proc = &allProcs[pgprocno];
2288 
2289  pg_read_barrier(); /* pairs with GetNewTransactionId */
2290 
2291  memcpy(snapshot->subxip + subcount,
2292  (void *) proc->subxids.xids,
2293  nsubxids * sizeof(TransactionId));
2294  subcount += nsubxids;
2295  }
2296  }
2297  }
2298  }
2299  }
2300  else
2301  {
2302  /*
2303  * We're in hot standby, so get XIDs from KnownAssignedXids.
2304  *
2305  * We store all xids directly into subxip[]. Here's why:
2306  *
2307  * In recovery we don't know which xids are top-level and which are
2308  * subxacts, a design choice that greatly simplifies xid processing.
2309  *
2310  * It seems like we would want to try to put xids into xip[] only, but
2311  * that is fairly small. We would either need to make that bigger or
2312  * to increase the rate at which we WAL-log xid assignment; neither is
2313  * an appealing choice.
2314  *
2315  * We could try to store xids into xip[] first and then into subxip[]
2316  * if there are too many xids. That only works if the snapshot doesn't
2317  * overflow because we do not search subxip[] in that case. A simpler
2318  * way is to just store all xids in the subxact array because this is
2319  * by far the bigger array. We just leave the xip array empty.
2320  *
2321  * Either way we need to change the way XidInMVCCSnapshot() works
2322  * depending upon when the snapshot was taken, or change normal
2323  * snapshot processing so it matches.
2324  *
2325  * Note: It is possible for recovery to end before we finish taking
2326  * the snapshot, and for newly assigned transaction ids to be added to
2327  * the ProcArray. xmax cannot change while we hold ProcArrayLock, so
2328  * those newly added transaction ids would be filtered away, so we
2329  * need not be concerned about them.
2330  */
2331  subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin,
2332  xmax);
2333 
2334  if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid))
2335  suboverflowed = true;
2336  }
2337 
2338 
2339  /*
2340  * Fetch into local variable while ProcArrayLock is held - the
2341  * LWLockRelease below is a barrier, ensuring this happens inside the
2342  * lock.
2343  */
2344  replication_slot_xmin = procArray->replication_slot_xmin;
2345  replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
2346 
2348  MyProc->xmin = TransactionXmin = xmin;
2349 
2350  LWLockRelease(ProcArrayLock);
2351 
2352  /* maintain state for GlobalVis* */
2353  {
2354  TransactionId def_vis_xid;
2355  TransactionId def_vis_xid_data;
2356  FullTransactionId def_vis_fxid;
2357  FullTransactionId def_vis_fxid_data;
2358  FullTransactionId oldestfxid;
2359 
2360  /*
2361  * Converting oldestXid is only safe when xid horizon cannot advance,
2362  * i.e. holding locks. While we don't hold the lock anymore, all the
2363  * necessary data has been gathered with lock held.
2364  */
2365  oldestfxid = FullXidRelativeTo(latest_completed, oldestxid);
2366 
2367  /* apply vacuum_defer_cleanup_age */
2368  def_vis_xid_data =
2370 
2371  /* Check whether there's a replication slot requiring an older xmin. */
2372  def_vis_xid_data =
2373  TransactionIdOlder(def_vis_xid_data, replication_slot_xmin);
2374 
2375  /*
2376  * Rows in non-shared, non-catalog tables possibly could be vacuumed
2377  * if older than this xid.
2378  */
2379  def_vis_xid = def_vis_xid_data;
2380 
2381  /*
2382  * Check whether there's a replication slot requiring an older catalog
2383  * xmin.
2384  */
2385  def_vis_xid =
2386  TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid);
2387 
2388  def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid);
2389  def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data);
2390 
2391  /*
2392  * Check if we can increase upper bound. As a previous
2393  * GlobalVisUpdate() might have computed more aggressive values, don't
2394  * overwrite them if so.
2395  */
2396  GlobalVisSharedRels.definitely_needed =
2397  FullTransactionIdNewer(def_vis_fxid,
2398  GlobalVisSharedRels.definitely_needed);
2399  GlobalVisCatalogRels.definitely_needed =
2400  FullTransactionIdNewer(def_vis_fxid,
2401  GlobalVisCatalogRels.definitely_needed);
2402  GlobalVisDataRels.definitely_needed =
2403  FullTransactionIdNewer(def_vis_fxid_data,
2404  GlobalVisDataRels.definitely_needed);
2405  /* See temp_oldest_nonremovable computation in ComputeXidHorizons() */
2406  if (TransactionIdIsNormal(myxid))
2407  GlobalVisTempRels.definitely_needed =
2408  FullXidRelativeTo(latest_completed, myxid);
2409  else
2410  {
2411  GlobalVisTempRels.definitely_needed = latest_completed;
2412  FullTransactionIdAdvance(&GlobalVisTempRels.definitely_needed);
2413  }
2414 
2415  /*
2416  * Check if we know that we can initialize or increase the lower
2417  * bound. Currently the only cheap way to do so is to use
2418  * ShmemVariableCache->oldestXid as input.
2419  *
2420  * We should definitely be able to do better. We could e.g. put a
2421  * global lower bound value into ShmemVariableCache.
2422  */
2423  GlobalVisSharedRels.maybe_needed =
2424  FullTransactionIdNewer(GlobalVisSharedRels.maybe_needed,
2425  oldestfxid);
2426  GlobalVisCatalogRels.maybe_needed =
2427  FullTransactionIdNewer(GlobalVisCatalogRels.maybe_needed,
2428  oldestfxid);
2429  GlobalVisDataRels.maybe_needed =
2430  FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
2431  oldestfxid);
2432  /* accurate value known */
2433  GlobalVisTempRels.maybe_needed = GlobalVisTempRels.definitely_needed;
2434  }
2435 
2436  RecentXmin = xmin;
2438 
2439  snapshot->xmin = xmin;
2440  snapshot->xmax = xmax;
2441  snapshot->xcnt = count;
2442  snapshot->subxcnt = subcount;
2443  snapshot->suboverflowed = suboverflowed;
2444  snapshot->snapXactCompletionCount = curXactCompletionCount;
2445 
2446  snapshot->curcid = GetCurrentCommandId(false);
2447 
2448  /*
2449  * This is a new snapshot, so set both refcounts are zero, and mark it as
2450  * not copied in persistent memory.
2451  */
2452  snapshot->active_count = 0;
2453  snapshot->regd_count = 0;
2454  snapshot->copied = false;
2455 
2457 
2458  return snapshot;
2459 }
2460 
2461 /*
2462  * ProcArrayInstallImportedXmin -- install imported xmin into MyProc->xmin
2463  *
2464  * This is called when installing a snapshot imported from another
2465  * transaction. To ensure that OldestXmin doesn't go backwards, we must
2466  * check that the source transaction is still running, and we'd better do
2467  * that atomically with installing the new xmin.
2468  *
2469  * Returns true if successful, false if source xact is no longer running.
2470  */
2471 bool
2473  VirtualTransactionId *sourcevxid)
2474 {
2475  bool result = false;
2476  ProcArrayStruct *arrayP = procArray;
2477  int index;
2478 
2480  if (!sourcevxid)
2481  return false;
2482 
2483  /* Get lock so source xact can't end while we're doing this */
2484  LWLockAcquire(ProcArrayLock, LW_SHARED);
2485 
2486  for (index = 0; index < arrayP->numProcs; index++)
2487  {
2488  int pgprocno = arrayP->pgprocnos[index];
2489  PGPROC *proc = &allProcs[pgprocno];
2490  int statusFlags = ProcGlobal->statusFlags[index];
2491  TransactionId xid;
2492 
2493  /* Ignore procs running LAZY VACUUM */
2494  if (statusFlags & PROC_IN_VACUUM)
2495  continue;
2496 
2497  /* We are only interested in the specific virtual transaction. */
2498  if (proc->backendId != sourcevxid->backendId)
2499  continue;
2500  if (proc->lxid != sourcevxid->localTransactionId)
2501  continue;
2502 
2503  /*
2504  * We check the transaction's database ID for paranoia's sake: if it's
2505  * in another DB then its xmin does not cover us. Caller should have
2506  * detected this already, so we just treat any funny cases as
2507  * "transaction not found".
2508  */
2509  if (proc->databaseId != MyDatabaseId)
2510  continue;
2511 
2512  /*
2513  * Likewise, let's just make real sure its xmin does cover us.
2514  */
2515  xid = UINT32_ACCESS_ONCE(proc->xmin);
2516  if (!TransactionIdIsNormal(xid) ||
2517  !TransactionIdPrecedesOrEquals(xid, xmin))
2518  continue;
2519 
2520  /*
2521  * We're good. Install the new xmin. As in GetSnapshotData, set
2522  * TransactionXmin too. (Note that because snapmgr.c called
2523  * GetSnapshotData first, we'll be overwriting a valid xmin here, so
2524  * we don't check that.)
2525  */
2526  MyProc->xmin = TransactionXmin = xmin;
2527 
2528  result = true;
2529  break;
2530  }
2531 
2532  LWLockRelease(ProcArrayLock);
2533 
2534  return result;
2535 }
2536 
2537 /*
2538  * ProcArrayInstallRestoredXmin -- install restored xmin into MyProc->xmin
2539  *
2540  * This is like ProcArrayInstallImportedXmin, but we have a pointer to the
2541  * PGPROC of the transaction from which we imported the snapshot, rather than
2542  * an XID.
2543  *
2544  * Returns true if successful, false if source xact is no longer running.
2545  */
2546 bool
2548 {
2549  bool result = false;
2550  TransactionId xid;
2551 
2553  Assert(proc != NULL);
2554 
2555  /* Get lock so source xact can't end while we're doing this */
2556  LWLockAcquire(ProcArrayLock, LW_SHARED);
2557 
2558  /*
2559  * Be certain that the referenced PGPROC has an advertised xmin which is
2560  * no later than the one we're installing, so that the system-wide xmin
2561  * can't go backwards. Also, make sure it's running in the same database,
2562  * so that the per-database xmin cannot go backwards.
2563  */
2564  xid = UINT32_ACCESS_ONCE(proc->xmin);
2565  if (proc->databaseId == MyDatabaseId &&
2566  TransactionIdIsNormal(xid) &&
2567  TransactionIdPrecedesOrEquals(xid, xmin))
2568  {
2569  MyProc->xmin = TransactionXmin = xmin;
2570  result = true;
2571  }
2572 
2573  LWLockRelease(ProcArrayLock);
2574 
2575  return result;
2576 }
2577 
2578 /*
2579  * GetRunningTransactionData -- returns information about running transactions.
2580  *
2581  * Similar to GetSnapshotData but returns more information. We include
2582  * all PGPROCs with an assigned TransactionId, even VACUUM processes and
2583  * prepared transactions.
2584  *
2585  * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
2586  * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
2587  * array until the caller has WAL-logged this snapshot, and releases the
2588  * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
2589  * lock is released.
2590  *
2591  * The returned data structure is statically allocated; caller should not
2592  * modify it, and must not assume it is valid past the next call.
2593  *
2594  * This is never executed during recovery so there is no need to look at
2595  * KnownAssignedXids.
2596  *
2597  * Dummy PGPROCs from prepared transaction are included, meaning that this
2598  * may return entries with duplicated TransactionId values coming from
2599  * transaction finishing to prepare. Nothing is done about duplicated
2600  * entries here to not hold on ProcArrayLock more than necessary.
2601  *
2602  * We don't worry about updating other counters, we want to keep this as
2603  * simple as possible and leave GetSnapshotData() as the primary code for
2604  * that bookkeeping.
2605  *
2606  * Note that if any transaction has overflowed its cached subtransactions
2607  * then there is no real need include any subtransactions.
2608  */
2611 {
2612  /* result workspace */
2613  static RunningTransactionsData CurrentRunningXactsData;
2614 
2615  ProcArrayStruct *arrayP = procArray;
2616  TransactionId *other_xids = ProcGlobal->xids;
2617  RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
2618  TransactionId latestCompletedXid;
2619  TransactionId oldestRunningXid;
2620  TransactionId *xids;
2621  int index;
2622  int count;
2623  int subcount;
2624  bool suboverflowed;
2625 
2627 
2628  /*
2629  * Allocating space for maxProcs xids is usually overkill; numProcs would
2630  * be sufficient. But it seems better to do the malloc while not holding
2631  * the lock, so we can't look at numProcs. Likewise, we allocate much
2632  * more subxip storage than is probably needed.
2633  *
2634  * Should only be allocated in bgwriter, since only ever executed during
2635  * checkpoints.
2636  */
2637  if (CurrentRunningXacts->xids == NULL)
2638  {
2639  /*
2640  * First call
2641  */
2642  CurrentRunningXacts->xids = (TransactionId *)
2644  if (CurrentRunningXacts->xids == NULL)
2645  ereport(ERROR,
2646  (errcode(ERRCODE_OUT_OF_MEMORY),
2647  errmsg("out of memory")));
2648  }
2649 
2650  xids = CurrentRunningXacts->xids;
2651 
2652  count = subcount = 0;
2653  suboverflowed = false;
2654 
2655  /*
2656  * Ensure that no xids enter or leave the procarray while we obtain
2657  * snapshot.
2658  */
2659  LWLockAcquire(ProcArrayLock, LW_SHARED);
2660  LWLockAcquire(XidGenLock, LW_SHARED);
2661 
2662  latestCompletedXid =
2664  oldestRunningXid =
2666 
2667  /*
2668  * Spin over procArray collecting all xids
2669  */
2670  for (index = 0; index < arrayP->numProcs; index++)
2671  {
2672  TransactionId xid;
2673 
2674  /* Fetch xid just once - see GetNewTransactionId */
2675  xid = UINT32_ACCESS_ONCE(other_xids[index]);
2676 
2677  /*
2678  * We don't need to store transactions that don't have a TransactionId
2679  * yet because they will not show as running on a standby server.
2680  */
2681  if (!TransactionIdIsValid(xid))
2682  continue;
2683 
2684  /*
2685  * Be careful not to exclude any xids before calculating the values of
2686  * oldestRunningXid and suboverflowed, since these are used to clean
2687  * up transaction information held on standbys.
2688  */
2689  if (TransactionIdPrecedes(xid, oldestRunningXid))
2690  oldestRunningXid = xid;
2691 
2692  if (ProcGlobal->subxidStates[index].overflowed)
2693  suboverflowed = true;
2694 
2695  /*
2696  * If we wished to exclude xids this would be the right place for it.
2697  * Procs with the PROC_IN_VACUUM flag set don't usually assign xids,
2698  * but they do during truncation at the end when they get the lock and
2699  * truncate, so it is not much of a problem to include them if they
2700  * are seen and it is cleaner to include them.
2701  */
2702 
2703  xids[count++] = xid;
2704  }
2705 
2706  /*
2707  * Spin over procArray collecting all subxids, but only if there hasn't
2708  * been a suboverflow.
2709  */
2710  if (!suboverflowed)
2711  {
2712  XidCacheStatus *other_subxidstates = ProcGlobal->subxidStates;
2713 
2714  for (index = 0; index < arrayP->numProcs; index++)
2715  {
2716  int pgprocno = arrayP->pgprocnos[index];
2717  PGPROC *proc = &allProcs[pgprocno];
2718  int nsubxids;
2719 
2720  /*
2721  * Save subtransaction XIDs. Other backends can't add or remove
2722  * entries while we're holding XidGenLock.
2723  */
2724  nsubxids = other_subxidstates[index].count;
2725  if (nsubxids > 0)
2726  {
2727  /* barrier not really required, as XidGenLock is held, but ... */
2728  pg_read_barrier(); /* pairs with GetNewTransactionId */
2729 
2730  memcpy(&xids[count], (void *) proc->subxids.xids,
2731  nsubxids * sizeof(TransactionId));
2732  count += nsubxids;
2733  subcount += nsubxids;
2734 
2735  /*
2736  * Top-level XID of a transaction is always less than any of
2737  * its subxids, so we don't need to check if any of the
2738  * subxids are smaller than oldestRunningXid
2739  */
2740  }
2741  }
2742  }
2743 
2744  /*
2745  * It's important *not* to include the limits set by slots here because
2746  * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
2747  * were to be included here the initial value could never increase because
2748  * of a circular dependency where slots only increase their limits when
2749  * running xacts increases oldestRunningXid and running xacts only
2750  * increases if slots do.
2751  */
2752 
2753  CurrentRunningXacts->xcnt = count - subcount;
2754  CurrentRunningXacts->subxcnt = subcount;
2755  CurrentRunningXacts->subxid_overflow = suboverflowed;
2757  CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
2758  CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
2759 
2760  Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
2761  Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
2762  Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
2763 
2764  /* We don't release the locks here, the caller is responsible for that */
2765 
2766  return CurrentRunningXacts;
2767 }
2768 
2769 /*
2770  * GetOldestActiveTransactionId()
2771  *
2772  * Similar to GetSnapshotData but returns just oldestActiveXid. We include
2773  * all PGPROCs with an assigned TransactionId, even VACUUM processes.
2774  * We look at all databases, though there is no need to include WALSender
2775  * since this has no effect on hot standby conflicts.
2776  *
2777  * This is never executed during recovery so there is no need to look at
2778  * KnownAssignedXids.
2779  *
2780  * We don't worry about updating other counters, we want to keep this as
2781  * simple as possible and leave GetSnapshotData() as the primary code for
2782  * that bookkeeping.
2783  */
2786 {
2787  ProcArrayStruct *arrayP = procArray;
2788  TransactionId *other_xids = ProcGlobal->xids;
2789  TransactionId oldestRunningXid;
2790  int index;
2791 
2793 
2794  /*
2795  * Read nextXid, as the upper bound of what's still active.
2796  *
2797  * Reading a TransactionId is atomic, but we must grab the lock to make
2798  * sure that all XIDs < nextXid are already present in the proc array (or
2799  * have already completed), when we spin over it.
2800  */
2801  LWLockAcquire(XidGenLock, LW_SHARED);
2803  LWLockRelease(XidGenLock);
2804 
2805  /*
2806  * Spin over procArray collecting all xids and subxids.
2807  */
2808  LWLockAcquire(ProcArrayLock, LW_SHARED);
2809  for (index = 0; index < arrayP->numProcs; index++)
2810  {
2811  TransactionId xid;
2812 
2813  /* Fetch xid just once - see GetNewTransactionId */
2814  xid = UINT32_ACCESS_ONCE(other_xids[index]);
2815 
2816  if (!TransactionIdIsNormal(xid))
2817  continue;
2818 
2819  if (TransactionIdPrecedes(xid, oldestRunningXid))
2820  oldestRunningXid = xid;
2821 
2822  /*
2823  * Top-level XID of a transaction is always less than any of its
2824  * subxids, so we don't need to check if any of the subxids are
2825  * smaller than oldestRunningXid
2826  */
2827  }
2828  LWLockRelease(ProcArrayLock);
2829 
2830  return oldestRunningXid;
2831 }
2832 
2833 /*
2834  * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
2835  *
2836  * Returns the oldest xid that we can guarantee not to have been affected by
2837  * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
2838  * transaction aborted. Note that the value can (and most of the time will) be
2839  * much more conservative than what really has been affected by vacuum, but we
2840  * currently don't have better data available.
2841  *
2842  * This is useful to initialize the cutoff xid after which a new changeset
2843  * extraction replication slot can start decoding changes.
2844  *
2845  * Must be called with ProcArrayLock held either shared or exclusively,
2846  * although most callers will want to use exclusive mode since it is expected
2847  * that the caller will immediately use the xid to peg the xmin horizon.
2848  */
2851 {
2852  ProcArrayStruct *arrayP = procArray;
2853  TransactionId oldestSafeXid;
2854  int index;
2855  bool recovery_in_progress = RecoveryInProgress();
2856 
2857  Assert(LWLockHeldByMe(ProcArrayLock));
2858 
2859  /*
2860  * Acquire XidGenLock, so no transactions can acquire an xid while we're
2861  * running. If no transaction with xid were running concurrently a new xid
2862  * could influence the RecentXmin et al.
2863  *
2864  * We initialize the computation to nextXid since that's guaranteed to be
2865  * a safe, albeit pessimal, value.
2866  */
2867  LWLockAcquire(XidGenLock, LW_SHARED);
2869 
2870  /*
2871  * If there's already a slot pegging the xmin horizon, we can start with
2872  * that value, it's guaranteed to be safe since it's computed by this
2873  * routine initially and has been enforced since. We can always use the
2874  * slot's general xmin horizon, but the catalog horizon is only usable
2875  * when only catalog data is going to be looked at.
2876  */
2877  if (TransactionIdIsValid(procArray->replication_slot_xmin) &&
2879  oldestSafeXid))
2880  oldestSafeXid = procArray->replication_slot_xmin;
2881 
2882  if (catalogOnly &&
2885  oldestSafeXid))
2886  oldestSafeXid = procArray->replication_slot_catalog_xmin;
2887 
2888  /*
2889  * If we're not in recovery, we walk over the procarray and collect the
2890  * lowest xid. Since we're called with ProcArrayLock held and have
2891  * acquired XidGenLock, no entries can vanish concurrently, since
2892  * ProcGlobal->xids[i] is only set with XidGenLock held and only cleared
2893  * with ProcArrayLock held.
2894  *
2895  * In recovery we can't lower the safe value besides what we've computed
2896  * above, so we'll have to wait a bit longer there. We unfortunately can
2897  * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
2898  * machinery can miss values and return an older value than is safe.
2899  */
2900  if (!recovery_in_progress)
2901  {
2902  TransactionId *other_xids = ProcGlobal->xids;
2903 
2904  /*
2905  * Spin over procArray collecting min(ProcGlobal->xids[i])
2906  */
2907  for (index = 0; index < arrayP->numProcs; index++)
2908  {
2909  TransactionId xid;
2910 
2911  /* Fetch xid just once - see GetNewTransactionId */
2912  xid = UINT32_ACCESS_ONCE(other_xids[index]);
2913 
2914  if (!TransactionIdIsNormal(xid))
2915  continue;
2916 
2917  if (TransactionIdPrecedes(xid, oldestSafeXid))
2918  oldestSafeXid = xid;
2919  }
2920  }
2921 
2922  LWLockRelease(XidGenLock);
2923 
2924  return oldestSafeXid;
2925 }
2926 
2927 /*
2928  * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
2929  * delaying checkpoint because they have critical actions in progress.
2930  *
2931  * Constructs an array of VXIDs of transactions that are currently in commit
2932  * critical sections, as shown by having delayChkpt set in their PGPROC.
2933  *
2934  * Returns a palloc'd array that should be freed by the caller.
2935  * *nvxids is the number of valid entries.
2936  *
2937  * Note that because backends set or clear delayChkpt without holding any lock,
2938  * the result is somewhat indeterminate, but we don't really care. Even in
2939  * a multiprocessor with delayed writes to shared memory, it should be certain
2940  * that setting of delayChkpt will propagate to shared memory when the backend
2941  * takes a lock, so we cannot fail to see a virtual xact as delayChkpt if
2942  * it's already inserted its commit record. Whether it takes a little while
2943  * for clearing of delayChkpt to propagate is unimportant for correctness.
2944  */
2947 {
2948  VirtualTransactionId *vxids;
2949  ProcArrayStruct *arrayP = procArray;
2950  int count = 0;
2951  int index;
2952 
2953  /* allocate what's certainly enough result space */
2954  vxids = (VirtualTransactionId *)
2955  palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
2956 
2957  LWLockAcquire(ProcArrayLock, LW_SHARED);
2958 
2959  for (index = 0; index < arrayP->numProcs; index++)
2960  {
2961  int pgprocno = arrayP->pgprocnos[index];
2962  PGPROC *proc = &allProcs[pgprocno];
2963 
2964  if (proc->delayChkpt)
2965  {
2966  VirtualTransactionId vxid;
2967 
2968  GET_VXID_FROM_PGPROC(vxid, *proc);
2969  if (VirtualTransactionIdIsValid(vxid))
2970  vxids[count++] = vxid;
2971  }
2972  }
2973 
2974  LWLockRelease(ProcArrayLock);
2975 
2976  *nvxids = count;
2977  return vxids;
2978 }
2979 
2980 /*
2981  * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
2982  *
2983  * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
2984  * of the specified VXIDs are still in critical sections of code.
2985  *
2986  * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
2987  * those numbers should be small enough for it not to be a problem.
2988  */
2989 bool
2991 {
2992  bool result = false;
2993  ProcArrayStruct *arrayP = procArray;
2994  int index;
2995 
2996  LWLockAcquire(ProcArrayLock, LW_SHARED);
2997 
2998  for (index = 0; index < arrayP->numProcs; index++)
2999  {
3000  int pgprocno = arrayP->pgprocnos[index];
3001  PGPROC *proc = &allProcs[pgprocno];
3002  VirtualTransactionId vxid;
3003 
3004  GET_VXID_FROM_PGPROC(vxid, *proc);
3005 
3006  if (proc->delayChkpt && VirtualTransactionIdIsValid(vxid))
3007  {
3008  int i;
3009 
3010  for (i = 0; i < nvxids; i++)
3011  {
3012  if (VirtualTransactionIdEquals(vxid, vxids[i]))
3013  {
3014  result = true;
3015  break;
3016  }
3017  }
3018  if (result)
3019  break;
3020  }
3021  }
3022 
3023  LWLockRelease(ProcArrayLock);
3024 
3025  return result;
3026 }
3027 
3028 /*
3029  * BackendPidGetProc -- get a backend's PGPROC given its PID
3030  *
3031  * Returns NULL if not found. Note that it is up to the caller to be
3032  * sure that the question remains meaningful for long enough for the
3033  * answer to be used ...
3034  */
3035 PGPROC *
3037 {
3038  PGPROC *result;
3039 
3040  if (pid == 0) /* never match dummy PGPROCs */
3041  return NULL;
3042 
3043  LWLockAcquire(ProcArrayLock, LW_SHARED);
3044 
3045  result = BackendPidGetProcWithLock(pid);
3046 
3047  LWLockRelease(ProcArrayLock);
3048 
3049  return result;
3050 }
3051 
3052 /*
3053  * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID
3054  *
3055  * Same as above, except caller must be holding ProcArrayLock. The found
3056  * entry, if any, can be assumed to be valid as long as the lock remains held.
3057  */
3058 PGPROC *
3060 {
3061  PGPROC *result = NULL;
3062  ProcArrayStruct *arrayP = procArray;
3063  int index;
3064 
3065  if (pid == 0) /* never match dummy PGPROCs */
3066  return NULL;
3067 
3068  for (index = 0; index < arrayP->numProcs; index++)
3069  {
3070  PGPROC *proc = &allProcs[arrayP->pgprocnos[index]];
3071 
3072  if (proc->pid == pid)
3073  {
3074  result = proc;
3075  break;
3076  }
3077  }
3078 
3079  return result;
3080 }
3081 
3082 /*
3083  * BackendXidGetPid -- get a backend's pid given its XID
3084  *
3085  * Returns 0 if not found or it's a prepared transaction. Note that
3086  * it is up to the caller to be sure that the question remains
3087  * meaningful for long enough for the answer to be used ...
3088  *
3089  * Only main transaction Ids are considered. This function is mainly
3090  * useful for determining what backend owns a lock.
3091  *
3092  * Beware that not every xact has an XID assigned. However, as long as you
3093  * only call this using an XID found on disk, you're safe.
3094  */
3095 int
3097 {
3098  int result = 0;
3099  ProcArrayStruct *arrayP = procArray;
3100  TransactionId *other_xids = ProcGlobal->xids;
3101  int index;
3102 
3103  if (xid == InvalidTransactionId) /* never match invalid xid */
3104  return 0;
3105 
3106  LWLockAcquire(ProcArrayLock, LW_SHARED);
3107 
3108  for (index = 0; index < arrayP->numProcs; index++)
3109  {
3110  int pgprocno = arrayP->pgprocnos[index];
3111  PGPROC *proc = &allProcs[pgprocno];
3112 
3113  if (other_xids[index] == xid)
3114  {
3115  result = proc->pid;
3116  break;
3117  }
3118  }
3119 
3120  LWLockRelease(ProcArrayLock);
3121 
3122  return result;
3123 }
3124 
3125 /*
3126  * IsBackendPid -- is a given pid a running backend
3127  *
3128  * This is not called by the backend, but is called by external modules.
3129  */
3130 bool
3132 {
3133  return (BackendPidGetProc(pid) != NULL);
3134 }
3135 
3136 
3137 /*
3138  * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
3139  *
3140  * The array is palloc'd. The number of valid entries is returned into *nvxids.
3141  *
3142  * The arguments allow filtering the set of VXIDs returned. Our own process
3143  * is always skipped. In addition:
3144  * If limitXmin is not InvalidTransactionId, skip processes with
3145  * xmin > limitXmin.
3146  * If excludeXmin0 is true, skip processes with xmin = 0.
3147  * If allDbs is false, skip processes attached to other databases.
3148  * If excludeVacuum isn't zero, skip processes for which
3149  * (statusFlags & excludeVacuum) is not zero.
3150  *
3151  * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
3152  * allow skipping backends whose oldest live snapshot is no older than
3153  * some snapshot we have. Since we examine the procarray with only shared
3154  * lock, there are race conditions: a backend could set its xmin just after
3155  * we look. Indeed, on multiprocessors with weak memory ordering, the
3156  * other backend could have set its xmin *before* we look. We know however
3157  * that such a backend must have held shared ProcArrayLock overlapping our
3158  * own hold of ProcArrayLock, else we would see its xmin update. Therefore,
3159  * any snapshot the other backend is taking concurrently with our scan cannot
3160  * consider any transactions as still running that we think are committed
3161  * (since backends must hold ProcArrayLock exclusive to commit).
3162  */
3164 GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
3165  bool allDbs, int excludeVacuum,
3166  int *nvxids)
3167 {
3168  VirtualTransactionId *vxids;
3169  ProcArrayStruct *arrayP = procArray;
3170  int count = 0;
3171  int index;
3172 
3173  /* allocate what's certainly enough result space */
3174  vxids = (VirtualTransactionId *)
3175  palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
3176 
3177  LWLockAcquire(ProcArrayLock, LW_SHARED);
3178 
3179  for (index = 0; index < arrayP->numProcs; index++)
3180  {
3181  int pgprocno = arrayP->pgprocnos[index];
3182  PGPROC *proc = &allProcs[pgprocno];
3183  uint8 statusFlags = ProcGlobal->statusFlags[index];
3184 
3185  if (proc == MyProc)
3186  continue;
3187 
3188  if (excludeVacuum & statusFlags)
3189  continue;
3190 
3191  if (allDbs || proc->databaseId == MyDatabaseId)
3192  {
3193  /* Fetch xmin just once - might change on us */
3194  TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
3195 
3196  if (excludeXmin0 && !TransactionIdIsValid(pxmin))
3197  continue;
3198 
3199  /*
3200  * InvalidTransactionId precedes all other XIDs, so a proc that
3201  * hasn't set xmin yet will not be rejected by this test.
3202  */
3203  if (!TransactionIdIsValid(limitXmin) ||
3204  TransactionIdPrecedesOrEquals(pxmin, limitXmin))
3205  {
3206  VirtualTransactionId vxid;
3207 
3208  GET_VXID_FROM_PGPROC(vxid, *proc);
3209  if (VirtualTransactionIdIsValid(vxid))
3210  vxids[count++] = vxid;
3211  }
3212  }
3213  }
3214 
3215  LWLockRelease(ProcArrayLock);
3216 
3217  *nvxids = count;
3218  return vxids;
3219 }
3220 
3221 /*
3222  * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
3223  *
3224  * Usage is limited to conflict resolution during recovery on standby servers.
3225  * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId
3226  * in cases where we cannot accurately determine a value for latestRemovedXid.
3227  *
3228  * If limitXmin is InvalidTransactionId then we want to kill everybody,
3229  * so we're not worried if they have a snapshot or not, nor does it really
3230  * matter what type of lock we hold.
3231  *
3232  * All callers that are checking xmins always now supply a valid and useful
3233  * value for limitXmin. The limitXmin is always lower than the lowest
3234  * numbered KnownAssignedXid that is not already a FATAL error. This is
3235  * because we only care about cleanup records that are cleaning up tuple
3236  * versions from committed transactions. In that case they will only occur
3237  * at the point where the record is less than the lowest running xid. That
3238  * allows us to say that if any backend takes a snapshot concurrently with
3239  * us then the conflict assessment made here would never include the snapshot
3240  * that is being derived. So we take LW_SHARED on the ProcArray and allow
3241  * concurrent snapshots when limitXmin is valid. We might think about adding
3242  * Assert(limitXmin < lowest(KnownAssignedXids))
3243  * but that would not be true in the case of FATAL errors lagging in array,
3244  * but we already know those are bogus anyway, so we skip that test.
3245  *
3246  * If dbOid is valid we skip backends attached to other databases.
3247  *
3248  * Be careful to *not* pfree the result from this function. We reuse
3249  * this array sufficiently often that we use malloc for the result.
3250  */
3253 {
3254  static VirtualTransactionId *vxids;
3255  ProcArrayStruct *arrayP = procArray;
3256  int count = 0;
3257  int index;
3258 
3259  /*
3260  * If first time through, get workspace to remember main XIDs in. We
3261  * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
3262  * result space, remembering room for a terminator.
3263  */
3264  if (vxids == NULL)
3265  {
3266  vxids = (VirtualTransactionId *)
3267  malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
3268  if (vxids == NULL)
3269  ereport(ERROR,
3270  (errcode(ERRCODE_OUT_OF_MEMORY),
3271  errmsg("out of memory")));
3272  }
3273 
3274  LWLockAcquire(ProcArrayLock, LW_SHARED);
3275 
3276  for (index = 0; index < arrayP->numProcs; index++)
3277  {
3278  int pgprocno = arrayP->pgprocnos[index];
3279  PGPROC *proc = &allProcs[pgprocno];
3280 
3281  /* Exclude prepared transactions */
3282  if (proc->pid == 0)
3283  continue;
3284 
3285  if (!OidIsValid(dbOid) ||
3286  proc->databaseId == dbOid)
3287  {
3288  /* Fetch xmin just once - can't change on us, but good coding */
3289  TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
3290 
3291  /*
3292  * We ignore an invalid pxmin because this means that backend has
3293  * no snapshot currently. We hold a Share lock to avoid contention
3294  * with users taking snapshots. That is not a problem because the
3295  * current xmin is always at least one higher than the latest
3296  * removed xid, so any new snapshot would never conflict with the
3297  * test here.
3298  */
3299  if (!TransactionIdIsValid(limitXmin) ||
3300  (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin)))
3301  {
3302  VirtualTransactionId vxid;
3303 
3304  GET_VXID_FROM_PGPROC(vxid, *proc);
3305  if (VirtualTransactionIdIsValid(vxid))
3306  vxids[count++] = vxid;
3307  }
3308  }
3309  }
3310 
3311  LWLockRelease(ProcArrayLock);
3312 
3313  /* add the terminator */
3314  vxids[count].backendId = InvalidBackendId;
3316 
3317  return vxids;
3318 }
3319 
3320 /*
3321  * CancelVirtualTransaction - used in recovery conflict processing
3322  *
3323  * Returns pid of the process signaled, or 0 if not found.
3324  */
3325 pid_t
3327 {
3328  return SignalVirtualTransaction(vxid, sigmode, true);
3329 }
3330 
3331 pid_t
3333  bool conflictPending)
3334 {
3335  ProcArrayStruct *arrayP = procArray;
3336  int index;
3337  pid_t pid = 0;
3338 
3339  LWLockAcquire(ProcArrayLock, LW_SHARED);
3340 
3341  for (index = 0; index < arrayP->numProcs; index++)
3342  {
3343  int pgprocno = arrayP->pgprocnos[index];
3344  PGPROC *proc = &allProcs[pgprocno];
3345  VirtualTransactionId procvxid;
3346 
3347  GET_VXID_FROM_PGPROC(procvxid, *proc);
3348 
3349  if (procvxid.backendId == vxid.backendId &&
3350  procvxid.localTransactionId == vxid.localTransactionId)
3351  {
3352  proc->recoveryConflictPending = conflictPending;
3353  pid = proc->pid;
3354  if (pid != 0)
3355  {
3356  /*
3357  * Kill the pid if it's still here. If not, that's what we
3358  * wanted so ignore any errors.
3359  */
3360  (void) SendProcSignal(pid, sigmode, vxid.backendId);
3361  }
3362  break;
3363  }
3364  }
3365 
3366  LWLockRelease(ProcArrayLock);
3367 
3368  return pid;
3369 }
3370 
3371 /*
3372  * MinimumActiveBackends --- count backends (other than myself) that are
3373  * in active transactions. Return true if the count exceeds the
3374  * minimum threshold passed. This is used as a heuristic to decide if
3375  * a pre-XLOG-flush delay is worthwhile during commit.
3376  *
3377  * Do not count backends that are blocked waiting for locks, since they are
3378  * not going to get to run until someone else commits.
3379  */
3380 bool
3382 {
3383  ProcArrayStruct *arrayP = procArray;
3384  int count = 0;
3385  int index;
3386 
3387  /* Quick short-circuit if no minimum is specified */
3388  if (min == 0)
3389  return true;
3390 
3391  /*
3392  * Note: for speed, we don't acquire ProcArrayLock. This is a little bit
3393  * bogus, but since we are only testing fields for zero or nonzero, it
3394  * should be OK. The result is only used for heuristic purposes anyway...
3395  */
3396  for (index = 0; index < arrayP->numProcs; index++)
3397  {
3398  int pgprocno = arrayP->pgprocnos[index];
3399  PGPROC *proc = &allProcs[pgprocno];
3400 
3401  /*
3402  * Since we're not holding a lock, need to be prepared to deal with
3403  * garbage, as someone could have incremented numProcs but not yet
3404  * filled the structure.
3405  *
3406  * If someone just decremented numProcs, 'proc' could also point to a
3407  * PGPROC entry that's no longer in the array. It still points to a
3408  * PGPROC struct, though, because freed PGPROC entries just go to the
3409  * free list and are recycled. Its contents are nonsense in that case,
3410  * but that's acceptable for this function.
3411  */
3412  if (pgprocno == -1)
3413  continue; /* do not count deleted entries */
3414  if (proc == MyProc)
3415  continue; /* do not count myself */
3416  if (proc->xid == InvalidTransactionId)
3417  continue; /* do not count if no XID assigned */
3418  if (proc->pid == 0)
3419  continue; /* do not count prepared xacts */
3420  if (proc->waitLock != NULL)
3421  continue; /* do not count if blocked on a lock */
3422  count++;
3423  if (count >= min)
3424  break;
3425  }
3426 
3427  return count >= min;
3428 }
3429 
3430 /*
3431  * CountDBBackends --- count backends that are using specified database
3432  */
3433 int
3435 {
3436  ProcArrayStruct *arrayP = procArray;
3437  int count = 0;
3438  int index;
3439 
3440  LWLockAcquire(ProcArrayLock, LW_SHARED);
3441 
3442  for (index = 0; index < arrayP->numProcs; index++)
3443  {
3444  int pgprocno = arrayP->pgprocnos[index];
3445  PGPROC *proc = &allProcs[pgprocno];
3446 
3447  if (proc->pid == 0)
3448  continue; /* do not count prepared xacts */
3449  if (!OidIsValid(databaseid) ||
3450  proc->databaseId == databaseid)
3451  count++;
3452  }
3453 
3454  LWLockRelease(ProcArrayLock);
3455 
3456  return count;
3457 }
3458 
3459 /*
3460  * CountDBConnections --- counts database backends ignoring any background
3461  * worker processes
3462  */
3463 int
3465 {
3466  ProcArrayStruct *arrayP = procArray;
3467  int count = 0;
3468  int index;
3469 
3470  LWLockAcquire(ProcArrayLock, LW_SHARED);
3471 
3472  for (index = 0; index < arrayP->numProcs; index++)
3473  {
3474  int pgprocno = arrayP->pgprocnos[index];
3475  PGPROC *proc = &allProcs[pgprocno];
3476 
3477  if (proc->pid == 0)
3478  continue; /* do not count prepared xacts */
3479  if (proc->isBackgroundWorker)
3480  continue; /* do not count background workers */
3481  if (!OidIsValid(databaseid) ||
3482  proc->databaseId == databaseid)
3483  count++;
3484  }
3485 
3486  LWLockRelease(ProcArrayLock);
3487 
3488  return count;
3489 }
3490 
3491 /*
3492  * CancelDBBackends --- cancel backends that are using specified database
3493  */
3494 void
3495 CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
3496 {
3497  ProcArrayStruct *arrayP = procArray;
3498  int index;
3499 
3500  /* tell all backends to die */
3501  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3502 
3503  for (index = 0; index < arrayP->numProcs; index++)
3504  {
3505  int pgprocno = arrayP->pgprocnos[index];
3506  PGPROC *proc = &allProcs[pgprocno];
3507 
3508  if (databaseid == InvalidOid || proc->databaseId == databaseid)
3509  {
3510  VirtualTransactionId procvxid;
3511  pid_t pid;
3512 
3513  GET_VXID_FROM_PGPROC(procvxid, *proc);
3514 
3515  proc->recoveryConflictPending = conflictPending;
3516  pid = proc->pid;
3517  if (pid != 0)
3518  {
3519  /*
3520  * Kill the pid if it's still here. If not, that's what we
3521  * wanted so ignore any errors.
3522  */
3523  (void) SendProcSignal(pid, sigmode, procvxid.backendId);
3524  }
3525  }
3526  }
3527 
3528  LWLockRelease(ProcArrayLock);
3529 }
3530 
3531 /*
3532  * CountUserBackends --- count backends that are used by specified user
3533  */
3534 int
3536 {
3537  ProcArrayStruct *arrayP = procArray;
3538  int count = 0;
3539  int index;
3540 
3541  LWLockAcquire(ProcArrayLock, LW_SHARED);
3542 
3543  for (index = 0; index < arrayP->numProcs; index++)
3544  {
3545  int pgprocno = arrayP->pgprocnos[index];
3546  PGPROC *proc = &allProcs[pgprocno];
3547 
3548  if (proc->pid == 0)
3549  continue; /* do not count prepared xacts */
3550  if (proc->isBackgroundWorker)
3551  continue; /* do not count background workers */
3552  if (proc->roleId == roleid)
3553  count++;
3554  }
3555 
3556  LWLockRelease(ProcArrayLock);
3557 
3558  return count;
3559 }
3560 
3561 /*
3562  * CountOtherDBBackends -- check for other backends running in the given DB
3563  *
3564  * If there are other backends in the DB, we will wait a maximum of 5 seconds
3565  * for them to exit. Autovacuum backends are encouraged to exit early by
3566  * sending them SIGTERM, but normal user backends are just waited for.
3567  *
3568  * The current backend is always ignored; it is caller's responsibility to
3569  * check whether the current backend uses the given DB, if it's important.
3570  *
3571  * Returns true if there are (still) other backends in the DB, false if not.
3572  * Also, *nbackends and *nprepared are set to the number of other backends
3573  * and prepared transactions in the DB, respectively.
3574  *
3575  * This function is used to interlock DROP DATABASE and related commands
3576  * against there being any active backends in the target DB --- dropping the
3577  * DB while active backends remain would be a Bad Thing. Note that we cannot
3578  * detect here the possibility of a newly-started backend that is trying to
3579  * connect to the doomed database, so additional interlocking is needed during
3580  * backend startup. The caller should normally hold an exclusive lock on the
3581  * target DB before calling this, which is one reason we mustn't wait
3582  * indefinitely.
3583  */
3584 bool
3585 CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
3586 {
3587  ProcArrayStruct *arrayP = procArray;
3588 
3589 #define MAXAUTOVACPIDS 10 /* max autovacs to SIGTERM per iteration */
3590  int autovac_pids[MAXAUTOVACPIDS];
3591  int tries;
3592 
3593  /* 50 tries with 100ms sleep between tries makes 5 sec total wait */
3594  for (tries = 0; tries < 50; tries++)
3595  {
3596  int nautovacs = 0;
3597  bool found = false;
3598  int index;
3599 
3601 
3602  *nbackends = *nprepared = 0;
3603 
3604  LWLockAcquire(ProcArrayLock, LW_SHARED);
3605 
3606  for (index = 0; index < arrayP->numProcs; index++)
3607  {
3608  int pgprocno = arrayP->pgprocnos[index];
3609  PGPROC *proc = &allProcs[pgprocno];
3610  uint8 statusFlags = ProcGlobal->statusFlags[index];
3611 
3612  if (proc->databaseId != databaseId)
3613  continue;
3614  if (proc == MyProc)
3615  continue;
3616 
3617  found = true;
3618 
3619  if (proc->pid == 0)
3620  (*nprepared)++;
3621  else
3622  {
3623  (*nbackends)++;
3624  if ((statusFlags & PROC_IS_AUTOVACUUM) &&
3625  nautovacs < MAXAUTOVACPIDS)
3626  autovac_pids[nautovacs++] = proc->pid;
3627  }
3628  }
3629 
3630  LWLockRelease(ProcArrayLock);
3631 
3632  if (!found)
3633  return false; /* no conflicting backends, so done */
3634 
3635  /*
3636  * Send SIGTERM to any conflicting autovacuums before sleeping. We
3637  * postpone this step until after the loop because we don't want to
3638  * hold ProcArrayLock while issuing kill(). We have no idea what might
3639  * block kill() inside the kernel...
3640  */
3641  for (index = 0; index < nautovacs; index++)
3642  (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */
3643 
3644  /* sleep, then try again */
3645  pg_usleep(100 * 1000L); /* 100ms */
3646  }
3647 
3648  return true; /* timed out, still conflicts */
3649 }
3650 
3651 /*
3652  * Terminate existing connections to the specified database. This routine
3653  * is used by the DROP DATABASE command when user has asked to forcefully
3654  * drop the database.
3655  *
3656  * The current backend is always ignored; it is caller's responsibility to
3657  * check whether the current backend uses the given DB, if it's important.
3658  *
3659  * It doesn't allow to terminate the connections even if there is a one
3660  * backend with the prepared transaction in the target database.
3661  */
3662 void
3664 {
3665  ProcArrayStruct *arrayP = procArray;
3666  List *pids = NIL;
3667  int nprepared = 0;
3668  int i;
3669 
3670  LWLockAcquire(ProcArrayLock, LW_SHARED);
3671 
3672  for (i = 0; i < procArray->numProcs; i++)
3673  {
3674  int pgprocno = arrayP->pgprocnos[i];
3675  PGPROC *proc = &allProcs[pgprocno];
3676 
3677  if (proc->databaseId != databaseId)
3678  continue;
3679  if (proc == MyProc)
3680  continue;
3681 
3682  if (proc->pid != 0)
3683  pids = lappend_int(pids, proc->pid);
3684  else
3685  nprepared++;
3686  }
3687 
3688  LWLockRelease(ProcArrayLock);
3689 
3690  if (nprepared > 0)
3691  ereport(ERROR,
3692  (errcode(ERRCODE_OBJECT_IN_USE),
3693  errmsg("database \"%s\" is being used by prepared transactions",
3694  get_database_name(databaseId)),
3695  errdetail_plural("There is %d prepared transaction using the database.",
3696  "There are %d prepared transactions using the database.",
3697  nprepared,
3698  nprepared)));
3699 
3700  if (pids)
3701  {
3702  ListCell *lc;
3703 
3704  /*
3705  * Check whether we have the necessary rights to terminate other
3706  * sessions. We don't terminate any session until we ensure that we
3707  * have rights on all the sessions to be terminated. These checks are
3708  * the same as we do in pg_terminate_backend.
3709  *
3710  * In this case we don't raise some warnings - like "PID %d is not a
3711  * PostgreSQL server process", because for us already finished session
3712  * is not a problem.
3713  */
3714  foreach(lc, pids)
3715  {
3716  int pid = lfirst_int(lc);
3717  PGPROC *proc = BackendPidGetProc(pid);
3718 
3719  if (proc != NULL)
3720  {
3721  /* Only allow superusers to signal superuser-owned backends. */
3722  if (superuser_arg(proc->roleId) && !superuser())
3723  ereport(ERROR,
3724  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3725  errmsg("must be a superuser to terminate superuser process")));
3726 
3727  /* Users can signal backends they have role membership in. */
3728  if (!has_privs_of_role(GetUserId(), proc->roleId) &&
3729  !has_privs_of_role(GetUserId(), DEFAULT_ROLE_SIGNAL_BACKENDID))
3730  ereport(ERROR,
3731  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3732  errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend")));
3733  }
3734  }
3735 
3736  /*
3737  * There's a race condition here: once we release the ProcArrayLock,
3738  * it's possible for the session to exit before we issue kill. That
3739  * race condition possibility seems too unlikely to worry about. See
3740  * pg_signal_backend.
3741  */
3742  foreach(lc, pids)
3743  {
3744  int pid = lfirst_int(lc);
3745  PGPROC *proc = BackendPidGetProc(pid);
3746 
3747  if (proc != NULL)
3748  {
3749  /*
3750  * If we have setsid(), signal the backend's whole process
3751  * group
3752  */
3753 #ifdef HAVE_SETSID
3754  (void) kill(-pid, SIGTERM);
3755 #else
3756  (void) kill(pid, SIGTERM);
3757 #endif
3758  }
3759  }
3760  }
3761 }
3762 
3763 /*
3764  * ProcArraySetReplicationSlotXmin
3765  *
3766  * Install limits to future computations of the xmin horizon to prevent vacuum
3767  * and HOT pruning from removing affected rows still needed by clients with
3768  * replication slots.
3769  */
3770 void
3772  bool already_locked)
3773 {
3774  Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
3775 
3776  if (!already_locked)
3777  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3778 
3779  procArray->replication_slot_xmin = xmin;
3780  procArray->replication_slot_catalog_xmin = catalog_xmin;
3781 
3782  if (!already_locked)
3783  LWLockRelease(ProcArrayLock);
3784 }
3785 
3786 /*
3787  * ProcArrayGetReplicationSlotXmin
3788  *
3789  * Return the current slot xmin limits. That's useful to be able to remove
3790  * data that's older than those limits.
3791  */
3792 void
3794  TransactionId *catalog_xmin)
3795 {
3796  LWLockAcquire(ProcArrayLock, LW_SHARED);
3797 
3798  if (xmin != NULL)
3799  *xmin = procArray->replication_slot_xmin;
3800 
3801  if (catalog_xmin != NULL)
3802  *catalog_xmin = procArray->replication_slot_catalog_xmin;
3803 
3804  LWLockRelease(ProcArrayLock);
3805 }
3806 
3807 /*
3808  * XidCacheRemoveRunningXids
3809  *
3810  * Remove a bunch of TransactionIds from the list of known-running
3811  * subtransactions for my backend. Both the specified xid and those in
3812  * the xids[] array (of length nxids) are removed from the subxids cache.
3813  * latestXid must be the latest XID among the group.
3814  */
3815 void
3817  int nxids, const TransactionId *xids,
3818  TransactionId latestXid)
3819 {
3820  int i,
3821  j;
3822  XidCacheStatus *mysubxidstat;
3823 
3825 
3826  /*
3827  * We must hold ProcArrayLock exclusively in order to remove transactions
3828  * from the PGPROC array. (See src/backend/access/transam/README.) It's
3829  * possible this could be relaxed since we know this routine is only used
3830  * to abort subtransactions, but pending closer analysis we'd best be
3831  * conservative.
3832  *
3833  * Note that we do not have to be careful about memory ordering of our own
3834  * reads wrt. GetNewTransactionId() here - only this process can modify
3835  * relevant fields of MyProc/ProcGlobal->xids[]. But we do have to be
3836  * careful about our own writes being well ordered.
3837  */
3838  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3839 
3840  mysubxidstat = &ProcGlobal->subxidStates[MyProc->pgxactoff];
3841 
3842  /*
3843  * Under normal circumstances xid and xids[] will be in increasing order,
3844  * as will be the entries in subxids. Scan backwards to avoid O(N^2)
3845  * behavior when removing a lot of xids.
3846  */
3847  for (i = nxids - 1; i >= 0; i--)
3848  {
3849  TransactionId anxid = xids[i];
3850 
3851  for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
3852  {
3853  if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
3854  {
3856  pg_write_barrier();
3857  mysubxidstat->count--;
3859  break;
3860  }
3861  }
3862 
3863  /*
3864  * Ordinarily we should have found it, unless the cache has
3865  * overflowed. However it's also possible for this routine to be
3866  * invoked multiple times for the same subtransaction, in case of an
3867  * error during AbortSubTransaction. So instead of Assert, emit a
3868  * debug warning.
3869  */
3870  if (j < 0 && !MyProc->subxidStatus.overflowed)
3871  elog(WARNING, "did not find subXID %u in MyProc", anxid);
3872  }
3873 
3874  for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
3875  {
3876  if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
3877  {
3879  pg_write_barrier();
3880  mysubxidstat->count--;
3882  break;
3883  }
3884  }
3885  /* Ordinarily we should have found it, unless the cache has overflowed */
3886  if (j < 0 && !MyProc->subxidStatus.overflowed)
3887  elog(WARNING, "did not find subXID %u in MyProc", xid);
3888 
3889  /* Also advance global latestCompletedXid while holding the lock */
3890  MaintainLatestCompletedXid(latestXid);
3891 
3892  LWLockRelease(ProcArrayLock);
3893 }
3894 
3895 #ifdef XIDCACHE_DEBUG
3896 
3897 /*
3898  * Print stats about effectiveness of XID cache
3899  */
3900 static void
3901 DisplayXidCache(void)
3902 {
3903  fprintf(stderr,
3904  "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n",
3905  xc_by_recent_xmin,
3906  xc_by_known_xact,
3907  xc_by_my_xact,
3908  xc_by_latest_xid,
3909  xc_by_main_xid,
3910  xc_by_child_xid,
3911  xc_by_known_assigned,
3912  xc_no_overflow,
3913  xc_slow_answer);
3914 }
3915 #endif /* XIDCACHE_DEBUG */
3916 
3917 /*
3918  * If rel != NULL, return test state appropriate for relation, otherwise
3919  * return state usable for all relations. The latter may consider XIDs as
3920  * not-yet-visible-to-everyone that a state for a specific relation would
3921  * already consider visible-to-everyone.
3922  *
3923  * This needs to be called while a snapshot is active or registered, otherwise
3924  * there are wraparound and other dangers.
3925  *
3926  * See comment for GlobalVisState for details.
3927  */
3930 {
3931  bool need_shared;
3932  bool need_catalog;
3934 
3935  /* XXX: we should assert that a snapshot is pushed or registered */
3936  Assert(RecentXmin);
3937 
3938  if (!rel)
3939  need_shared = need_catalog = true;
3940  else
3941  {
3942  /*
3943  * Other kinds currently don't contain xids, nor always the necessary
3944  * logical decoding markers.
3945  */
3946  Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
3947  rel->rd_rel->relkind == RELKIND_MATVIEW ||
3948  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
3949 
3950  need_shared = rel->rd_rel->relisshared || RecoveryInProgress();
3951  need_catalog = IsCatalogRelation(rel) || RelationIsAccessibleInLogicalDecoding(rel);
3952  }
3953 
3954  if (need_shared)
3955  state = &GlobalVisSharedRels;
3956  else if (need_catalog)
3957  state = &GlobalVisCatalogRels;
3958  else if (RELATION_IS_LOCAL(rel))
3959  state = &GlobalVisTempRels;
3960  else
3961  state = &GlobalVisDataRels;
3962 
3965 
3966  return state;
3967 }
3968 
3969 /*
3970  * Return true if it's worth updating the accurate maybe_needed boundary.
3971  *
3972  * As it is somewhat expensive to determine xmin horizons, we don't want to
3973  * repeatedly do so when there is a low likelihood of it being beneficial.
3974  *
3975  * The current heuristic is that we update only if RecentXmin has changed
3976  * since the last update. If the oldest currently running transaction has not
3977  * finished, it is unlikely that recomputing the horizon would be useful.
3978  */
3979 static bool
3981 {
3982  /* hasn't been updated yet */
3984  return true;
3985 
3986  /*
3987  * If the maybe_needed/definitely_needed boundaries are the same, it's
3988  * unlikely to be beneficial to refresh boundaries.
3989  */
3991  state->definitely_needed))
3992  return false;
3993 
3994  /* does the last snapshot built have a different xmin? */
3996 }
3997 
3998 static void
4000 {
4001  GlobalVisSharedRels.maybe_needed =
4003  horizons->shared_oldest_nonremovable);
4004  GlobalVisCatalogRels.maybe_needed =
4006  horizons->catalog_oldest_nonremovable);
4007  GlobalVisDataRels.maybe_needed =
4009  horizons->data_oldest_nonremovable);
4010  GlobalVisTempRels.maybe_needed =
4012  horizons->temp_oldest_nonremovable);
4013 
4014  /*
4015  * In longer running transactions it's possible that transactions we
4016  * previously needed to treat as running aren't around anymore. So update
4017  * definitely_needed to not be earlier than maybe_needed.
4018  */
4019  GlobalVisSharedRels.definitely_needed =
4020  FullTransactionIdNewer(GlobalVisSharedRels.maybe_needed,
4021  GlobalVisSharedRels.definitely_needed);
4022  GlobalVisCatalogRels.definitely_needed =
4023  FullTransactionIdNewer(GlobalVisCatalogRels.maybe_needed,
4024  GlobalVisCatalogRels.definitely_needed);
4025  GlobalVisDataRels.definitely_needed =
4026  FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
4027  GlobalVisDataRels.definitely_needed);
4028  GlobalVisTempRels.definitely_needed = GlobalVisTempRels.maybe_needed;
4029 
4031 }
4032 
4033 /*
4034  * Update boundaries in GlobalVis{Shared,Catalog, Data}Rels
4035  * using ComputeXidHorizons().
4036  */
4037 static void
4039 {
4040  ComputeXidHorizonsResult horizons;
4041 
4042  /* updates the horizons as a side-effect */
4043  ComputeXidHorizons(&horizons);
4044 }
4045 
4046 /*
4047  * Return true if no snapshot still considers fxid to be running.
4048  *
4049  * The state passed needs to have been initialized for the relation fxid is
4050  * from (NULL is also OK), otherwise the result may not be correct.
4051  *
4052  * See comment for GlobalVisState for details.
4053  */
4054 bool
4056  FullTransactionId fxid)
4057 {
4058  /*
4059  * If fxid is older than maybe_needed bound, it definitely is visible to
4060  * everyone.
4061  */
4062  if (FullTransactionIdPrecedes(fxid, state->maybe_needed))
4063  return true;
4064 
4065  /*
4066  * If fxid is >= definitely_needed bound, it is very likely to still be
4067  * considered running.
4068  */
4070  return false;
4071 
4072  /*
4073  * fxid is between maybe_needed and definitely_needed, i.e. there might or
4074  * might not exist a snapshot considering fxid running. If it makes sense,
4075  * update boundaries and recheck.
4076  */
4077  if (GlobalVisTestShouldUpdate(state))
4078  {
4079  GlobalVisUpdate();
4080 
4082 
4083  return FullTransactionIdPrecedes(fxid, state->maybe_needed);
4084  }
4085  else
4086  return false;
4087 }
4088 
4089 /*
4090  * Wrapper around GlobalVisTestIsRemovableFullXid() for 32bit xids.
4091  *
4092  * It is crucial that this only gets called for xids from a source that
4093  * protects against xid wraparounds (e.g. from a table and thus protected by
4094  * relfrozenxid).
4095  */
4096 bool
4098 {
4099  FullTransactionId fxid;
4100 
4101  /*
4102  * Convert 32 bit argument to FullTransactionId. We can do so safely
4103  * because we know the xid has to, at the very least, be between
4104  * [oldestXid, nextFullXid), i.e. within 2 billion of xid. To avoid taking
4105  * a lock to determine either, we can just compare with
4106  * state->definitely_needed, which was based on those value at the time
4107  * the current snapshot was built.
4108  */
4109  fxid = FullXidRelativeTo(state->definitely_needed, xid);
4110 
4111  return GlobalVisTestIsRemovableFullXid(state, fxid);
4112 }
4113 
4114 /*
4115  * Return FullTransactionId below which all transactions are not considered
4116  * running anymore.
4117  *
4118  * Note: This is less efficient than testing with
4119  * GlobalVisTestIsRemovableFullXid as it likely requires building an accurate
4120  * cutoff, even in the case all the XIDs compared with the cutoff are outside
4121  * [maybe_needed, definitely_needed).
4122  */
4125 {
4126  /* acquire accurate horizon if not already done */
4127  if (GlobalVisTestShouldUpdate(state))
4128  GlobalVisUpdate();
4129 
4130  return state->maybe_needed;
4131 }
4132 
4133 /* Convenience wrapper around GlobalVisTestNonRemovableFullHorizon */
4136 {
4137  FullTransactionId cutoff;
4138 
4139  cutoff = GlobalVisTestNonRemovableFullHorizon(state);
4140 
4141  return XidFromFullTransactionId(cutoff);
4142 }
4143 
4144 /*
4145  * Convenience wrapper around GlobalVisTestFor() and
4146  * GlobalVisTestIsRemovableFullXid(), see their comments.
4147  */
4148 bool
4150 {
4152 
4153  state = GlobalVisTestFor(rel);
4154 
4155  return GlobalVisTestIsRemovableFullXid(state, fxid);
4156 }
4157 
4158 /*
4159  * Convenience wrapper around GlobalVisTestFor() and
4160  * GlobalVisTestIsRemovableXid(), see their comments.
4161  */
4162 bool
4164 {
4166 
4167  state = GlobalVisTestFor(rel);
4168 
4169  return GlobalVisTestIsRemovableXid(state, xid);
4170 }
4171 
4172 /*
4173  * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it
4174  * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel).
4175  *
4176  * Be very careful about when to use this function. It can only safely be used
4177  * when there is a guarantee that xid is within MaxTransactionId / 2 xids of
4178  * rel. That e.g. can be guaranteed if the caller assures a snapshot is
4179  * held by the backend and xid is from a table (where vacuum/freezing ensures
4180  * the xid has to be within that range), or if xid is from the procarray and
4181  * prevents xid wraparound that way.
4182  */
4183 static inline FullTransactionId
4185 {
4186  TransactionId rel_xid = XidFromFullTransactionId(rel);
4187 
4189  Assert(TransactionIdIsValid(rel_xid));
4190 
4191  /* not guaranteed to find issues, but likely to catch mistakes */
4193 
4195  + (int32) (xid - rel_xid));
4196 }
4197 
4198 
4199 /* ----------------------------------------------
4200  * KnownAssignedTransactionIds sub-module
4201  * ----------------------------------------------
4202  */
4203 
4204 /*
4205  * In Hot Standby mode, we maintain a list of transactions that are (or were)
4206  * running on the primary at the current point in WAL. These XIDs must be
4207  * treated as running by standby transactions, even though they are not in
4208  * the standby server's PGPROC array.
4209  *
4210  * We record all XIDs that we know have been assigned. That includes all the
4211  * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
4212  * been assigned. We can deduce the existence of unobserved XIDs because we
4213  * know XIDs are assigned in sequence, with no gaps. The KnownAssignedXids
4214  * list expands as new XIDs are observed or inferred, and contracts when
4215  * transaction completion records arrive.
4216  *
4217  * During hot standby we do not fret too much about the distinction between
4218  * top-level XIDs and subtransaction XIDs. We store both together in the
4219  * KnownAssignedXids list. In backends, this is copied into snapshots in
4220  * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot()
4221  * doesn't care about the distinction either. Subtransaction XIDs are
4222  * effectively treated as top-level XIDs and in the typical case pg_subtrans
4223  * links are *not* maintained (which does not affect visibility).
4224  *
4225  * We have room in KnownAssignedXids and in snapshots to hold maxProcs *
4226  * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every primary transaction must
4227  * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at
4228  * least every PGPROC_MAX_CACHED_SUBXIDS. When we receive one of these
4229  * records, we mark the subXIDs as children of the top XID in pg_subtrans,
4230  * and then remove them from KnownAssignedXids. This prevents overflow of
4231  * KnownAssignedXids and snapshots, at the cost that status checks for these
4232  * subXIDs will take a slower path through TransactionIdIsInProgress().
4233  * This means that KnownAssignedXids is not necessarily complete for subXIDs,
4234  * though it should be complete for top-level XIDs; this is the same situation
4235  * that holds with respect to the PGPROC entries in normal running.
4236  *
4237  * When we throw away subXIDs from KnownAssignedXids, we need to keep track of
4238  * that, similarly to tracking overflow of a PGPROC's subxids array. We do
4239  * that by remembering the lastOverflowedXid, ie the last thrown-away subXID.
4240  * As long as that is within the range of interesting XIDs, we have to assume
4241  * that subXIDs are missing from snapshots. (Note that subXID overflow occurs
4242  * on primary when 65th subXID arrives, whereas on standby it occurs when 64th
4243  * subXID arrives - that is not an error.)
4244  *
4245  * Should a backend on primary somehow disappear before it can write an abort
4246  * record, then we just leave those XIDs in KnownAssignedXids. They actually
4247  * aborted but we think they were running; the distinction is irrelevant
4248  * because either way any changes done by the transaction are not visible to
4249  * backends in the standby. We prune KnownAssignedXids when
4250  * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the
4251  * array due to such dead XIDs.
4252  */
4253 
4254 /*
4255  * RecordKnownAssignedTransactionIds
4256  * Record the given XID in KnownAssignedXids, as well as any preceding
4257  * unobserved XIDs.
4258  *
4259  * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
4260  * associated with a transaction. Must be called for each record after we
4261  * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
4262  *
4263  * Called during recovery in analogy with and in place of GetNewTransactionId()
4264  */
4265 void
4267 {
4271 
4272  elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
4273  xid, latestObservedXid);
4274 
4275  /*
4276  * When a newly observed xid arrives, it is frequently the case that it is
4277  * *not* the next xid in sequence. When this occurs, we must treat the
4278  * intervening xids as running also.
4279  */
4281  {
4282  TransactionId next_expected_xid;
4283 
4284  /*
4285  * Extend subtrans like we do in GetNewTransactionId() during normal
4286  * operation using individual extend steps. Note that we do not need
4287  * to extend clog since its extensions are WAL logged.
4288  *
4289  * This part has to be done regardless of standbyState since we
4290  * immediately start assigning subtransactions to their toplevel
4291  * transactions.
4292  */
4293  next_expected_xid = latestObservedXid;
4294  while (TransactionIdPrecedes(next_expected_xid, xid))
4295  {
4296  TransactionIdAdvance(next_expected_xid);
4297  ExtendSUBTRANS(next_expected_xid);
4298  }
4299  Assert(next_expected_xid == xid);
4300 
4301  /*
4302  * If the KnownAssignedXids machinery isn't up yet, there's nothing
4303  * more to do since we don't track assigned xids yet.
4304  */
4306  {
4307  latestObservedXid = xid;
4308  return;
4309  }
4310 
4311  /*
4312  * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
4313  */
4314  next_expected_xid = latestObservedXid;
4315  TransactionIdAdvance(next_expected_xid);
4316  KnownAssignedXidsAdd(next_expected_xid, xid, false);
4317 
4318  /*
4319  * Now we can advance latestObservedXid
4320  */
4321  latestObservedXid = xid;
4322 
4323  /* ShmemVariableCache->nextXid must be beyond any observed xid */
4325  next_expected_xid = latestObservedXid;
4326  TransactionIdAdvance(next_expected_xid);
4327  }
4328 }
4329 
4330 /*
4331  * ExpireTreeKnownAssignedTransactionIds
4332  * Remove the given XIDs from KnownAssignedXids.
4333  *
4334  * Called during recovery in analogy with and in place of ProcArrayEndTransaction()
4335  */
4336 void
4338  TransactionId *subxids, TransactionId max_xid)
4339 {
4341 
4342  /*
4343  * Uses same locking as transaction commit
4344  */
4345  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4346 
4347  KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
4348 
4349  /* As in ProcArrayEndTransaction, advance latestCompletedXid */
4351 
4352  /* ... and xactCompletionCount */
4354 
4355  LWLockRelease(ProcArrayLock);
4356 }
4357 
4358 /*
4359  * ExpireAllKnownAssignedTransactionIds
4360  * Remove all entries in KnownAssignedXids
4361  */
4362 void
4364 {
4365  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4367  LWLockRelease(ProcArrayLock);
4368 }
4369 
4370 /*
4371  * ExpireOldKnownAssignedTransactionIds
4372  * Remove KnownAssignedXids entries preceding the given XID
4373  */
4374 void
4376 {
4377  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4379  LWLockRelease(ProcArrayLock);
4380 }
4381 
4382 
4383 /*
4384  * Private module functions to manipulate KnownAssignedXids
4385  *
4386  * There are 5 main uses of the KnownAssignedXids data structure:
4387  *
4388  * * backends taking snapshots - all valid XIDs need to be copied out
4389  * * backends seeking to determine presence of a specific XID
4390  * * startup process adding new known-assigned XIDs
4391  * * startup process removing specific XIDs as transactions end
4392  * * startup process pruning array when special WAL records arrive
4393  *
4394  * This data structure is known to be a hot spot during Hot Standby, so we
4395  * go to some lengths to make these operations as efficient and as concurrent
4396  * as possible.
4397  *
4398  * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes
4399  * order, to be exact --- to allow binary search for specific XIDs. Note:
4400  * in general TransactionIdPrecedes would not provide a total order, but
4401  * we know that the entries present at any instant should not extend across
4402  * a large enough fraction of XID space to wrap around (the primary would
4403  * shut down for fear of XID wrap long before that happens). So it's OK to
4404  * use TransactionIdPrecedes as a binary-search comparator.
4405  *
4406  * It's cheap to maintain the sortedness during insertions, since new known
4407  * XIDs are always reported in XID order; we just append them at the right.
4408  *
4409  * To keep individual deletions cheap, we need to allow gaps in the array.
4410  * This is implemented by marking array elements as valid or invalid using
4411  * the parallel boolean array KnownAssignedXidsValid[]. A deletion is done
4412  * by setting KnownAssignedXidsValid[i] to false, *without* clearing the
4413  * XID entry itself. This preserves the property that the XID entries are
4414  * sorted, so we can do binary searches easily. Periodically we compress
4415  * out the unused entries; that's much cheaper than having to compress the
4416  * array immediately on every deletion.
4417  *
4418  * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
4419  * are those with indexes tail <= i < head; items outside this subscript range
4420  * have unspecified contents. When head reaches the end of the array, we
4421  * force compression of unused entries rather than wrapping around, since
4422  * allowing wraparound would greatly complicate the search logic. We maintain
4423  * an explicit tail pointer so that pruning of old XIDs can be done without
4424  * immediately moving the array contents. In most cases only a small fraction
4425  * of the array contains valid entries at any instant.
4426  *
4427  * Although only the startup process can ever change the KnownAssignedXids
4428  * data structure, we still need interlocking so that standby backends will
4429  * not observe invalid intermediate states. The convention is that backends
4430  * must hold shared ProcArrayLock to examine the array. To remove XIDs from
4431  * the array, the startup process must hold ProcArrayLock exclusively, for
4432  * the usual transactional reasons (compare commit/abort of a transaction
4433  * during normal running). Compressing unused entries out of the array
4434  * likewise requires exclusive lock. To add XIDs to the array, we just insert
4435  * them into slots to the right of the head pointer and then advance the head
4436  * pointer. This wouldn't require any lock at all, except that on machines
4437  * with weak memory ordering we need to be careful that other processors
4438  * see the array element changes before they see the head pointer change.
4439  * We handle this by using a spinlock to protect reads and writes of the
4440  * head/tail pointers. (We could dispense with the spinlock if we were to
4441  * create suitable memory access barrier primitives and use those instead.)
4442  * The spinlock must be taken to read or write the head/tail pointers unless
4443  * the caller holds ProcArrayLock exclusively.
4444  *
4445  * Algorithmic analysis:
4446  *
4447  * If we have a maximum of M slots, with N XIDs currently spread across
4448  * S elements then we have N <= S <= M always.
4449  *
4450  * * Adding a new XID is O(1) and needs little locking (unless compression
4451  * must happen)
4452  * * Compressing the array is O(S) and requires exclusive lock
4453  * * Removing an XID is O(logS) and requires exclusive lock
4454  * * Taking a snapshot is O(S) and requires shared lock
4455  * * Checking for an XID is O(logS) and requires shared lock
4456  *
4457  * In comparison, using a hash table for KnownAssignedXids would mean that
4458  * taking snapshots would be O(M). If we can maintain S << M then the
4459  * sorted array technique will deliver significantly faster snapshots.
4460  * If we try to keep S too small then we will spend too much time compressing,
4461  * so there is an optimal point for any workload mix. We use a heuristic to
4462  * decide when to compress the array, though trimming also helps reduce
4463  * frequency of compressing. The heuristic requires us to track the number of
4464  * currently valid XIDs in the array.
4465  */
4466 
4467 
4468 /*
4469  * Compress KnownAssignedXids by shifting valid data down to the start of the
4470  * array, removing any gaps.
4471  *
4472  * A compression step is forced if "force" is true, otherwise we do it
4473  * only if a heuristic indicates it's a good time to do it.
4474  *
4475  * Caller must hold ProcArrayLock in exclusive mode.
4476  */
4477 static void
4479 {
4480  ProcArrayStruct *pArray = procArray;
4481  int head,
4482  tail;
4483  int compress_index;
4484  int i;
4485 
4486  /* no spinlock required since we hold ProcArrayLock exclusively */
4487  head = pArray->headKnownAssignedXids;
4488  tail = pArray->tailKnownAssignedXids;
4489 
4490  if (!force)
4491  {
4492  /*
4493  * If we can choose how much to compress, use a heuristic to avoid
4494  * compressing too often or not often enough.
4495  *
4496  * Heuristic is if we have a large enough current spread and less than
4497  * 50% of the elements are currently in use, then compress. This
4498  * should ensure we compress fairly infrequently. We could compress
4499  * less often though the virtual array would spread out more and
4500  * snapshots would become more expensive.
4501  */
4502  int nelements = head - tail;
4503 
4504  if (nelements < 4 * PROCARRAY_MAXPROCS ||
4505  nelements < 2 * pArray->numKnownAssignedXids)
4506  return;
4507  }
4508 
4509  /*
4510  * We compress the array by reading the valid values from tail to head,
4511  * re-aligning data to 0th element.
4512  */
4513  compress_index = 0;
4514  for (i = tail; i < head; i++)
4515  {
4516  if (KnownAssignedXidsValid[i])
4517  {
4518  KnownAssignedXids[compress_index] = KnownAssignedXids[i];
4519  KnownAssignedXidsValid[compress_index] = true;
4520  compress_index++;
4521  }
4522  }
4523 
4524  pArray->tailKnownAssignedXids = 0;
4525  pArray->headKnownAssignedXids = compress_index;
4526 }
4527 
4528 /*
4529  * Add xids into KnownAssignedXids at the head of the array.
4530  *
4531  * xids from from_xid to to_xid, inclusive, are added to the array.
4532  *
4533  * If exclusive_lock is true then caller already holds ProcArrayLock in
4534  * exclusive mode, so we need no extra locking here. Else caller holds no
4535  * lock, so we need to be sure we maintain sufficient interlocks against
4536  * concurrent readers. (Only the startup process ever calls this, so no need
4537  * to worry about concurrent writers.)
4538  */
4539 static void
4541  bool exclusive_lock)
4542 {
4543  ProcArrayStruct *pArray = procArray;
4544  TransactionId next_xid;
4545  int head,
4546  tail;
4547  int nxids;
4548  int i;
4549 
4550  Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
4551 
4552  /*
4553  * Calculate how many array slots we'll need. Normally this is cheap; in
4554  * the unusual case where the XIDs cross the wrap point, we do it the hard
4555  * way.
4556  */
4557  if (to_xid >= from_xid)
4558  nxids = to_xid - from_xid + 1;
4559  else
4560  {
4561  nxids = 1;
4562  next_xid = from_xid;
4563  while (TransactionIdPrecedes(next_xid, to_xid))
4564  {
4565  nxids++;
4566  TransactionIdAdvance(next_xid);
4567  }
4568  }
4569 
4570  /*
4571  * Since only the startup process modifies the head/tail pointers, we
4572  * don't need a lock to read them here.
4573  */
4574  head = pArray->headKnownAssignedXids;
4575  tail = pArray->tailKnownAssignedXids;
4576 
4577  Assert(head >= 0 && head <= pArray->maxKnownAssignedXids);
4578  Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids);
4579 
4580  /*
4581  * Verify that insertions occur in TransactionId sequence. Note that even
4582  * if the last existing element is marked invalid, it must still have a
4583  * correctly sequenced XID value.
4584  */
4585  if (head > tail &&
4586  TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid))
4587  {
4589  elog(ERROR, "out-of-order XID insertion in KnownAssignedXids");
4590  }
4591 
4592  /*
4593  * If our xids won't fit in the remaining space, compress out free space
4594  */
4595  if (head + nxids > pArray->maxKnownAssignedXids)
4596  {
4597  /* must hold lock to compress */
4598  if (!exclusive_lock)
4599  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4600 
4602 
4603  head = pArray->headKnownAssignedXids;
4604  /* note: we no longer care about the tail pointer */
4605 
4606  if (!exclusive_lock)
4607  LWLockRelease(ProcArrayLock);
4608 
4609  /*
4610  * If it still won't fit then we're out of memory
4611  */
4612  if (head + nxids > pArray->maxKnownAssignedXids)
4613  elog(ERROR, "too many KnownAssignedXids");
4614  }
4615 
4616  /* Now we can insert the xids into the space starting at head */
4617  next_xid = from_xid;
4618  for (i = 0; i < nxids; i++)
4619  {
4620  KnownAssignedXids[head] = next_xid;
4621  KnownAssignedXidsValid[head] = true;
4622  TransactionIdAdvance(next_xid);
4623  head++;
4624  }
4625 
4626  /* Adjust count of number of valid entries */
4627  pArray->numKnownAssignedXids += nxids;
4628 
4629  /*
4630  * Now update the head pointer. We use a spinlock to protect this
4631  * pointer, not because the update is likely to be non-atomic, but to
4632  * ensure that other processors see the above array updates before they
4633  * see the head pointer change.
4634  *
4635  * If we're holding ProcArrayLock exclusively, there's no need to take the
4636  * spinlock.
4637  */
4638  if (exclusive_lock)
4639  pArray->headKnownAssignedXids = head;
4640  else
4641  {
4643  pArray->headKnownAssignedXids = head;
4645  }
4646 }
4647 
4648 /*
4649  * KnownAssignedXidsSearch
4650  *
4651  * Searches KnownAssignedXids for a specific xid and optionally removes it.
4652  * Returns true if it was found, false if not.
4653  *
4654  * Caller must hold ProcArrayLock in shared or exclusive mode.
4655  * Exclusive lock must be held for remove = true.
4656  */
4657 static bool
4659 {
4660  ProcArrayStruct *pArray = procArray;
4661  int first,
4662  last;
4663  int head;
4664  int tail;
4665  int result_index = -1;
4666 
4667  if (remove)
4668  {
4669  /* we hold ProcArrayLock exclusively, so no need for spinlock */
4670  tail = pArray->tailKnownAssignedXids;
4671  head = pArray->headKnownAssignedXids;
4672  }
4673  else
4674  {
4675  /* take spinlock to ensure we see up-to-date array contents */
4677  tail = pArray->tailKnownAssignedXids;
4678  head = pArray->headKnownAssignedXids;
4680  }
4681 
4682  /*
4683  * Standard binary search. Note we can ignore the KnownAssignedXidsValid
4684  * array here, since even invalid entries will contain sorted XIDs.
4685  */
4686  first = tail;
4687  last = head - 1;
4688  while (first <= last)
4689  {
4690  int mid_index;
4691  TransactionId mid_xid;
4692 
4693  mid_index = (first + last) / 2;
4694  mid_xid = KnownAssignedXids[mid_index];
4695 
4696  if (xid == mid_xid)
4697  {
4698  result_index = mid_index;
4699  break;
4700  }
4701  else if (TransactionIdPrecedes(xid, mid_xid))
4702  last = mid_index - 1;
4703  else
4704  first = mid_index + 1;
4705  }
4706 
4707  if (result_index < 0)
4708  return false; /* not in array */
4709 
4710  if (!KnownAssignedXidsValid[result_index])
4711  return false; /* in array, but invalid */
4712 
4713  if (remove)
4714  {
4715  KnownAssignedXidsValid[result_index] = false;
4716 
4717  pArray->numKnownAssignedXids--;
4718  Assert(pArray->numKnownAssignedXids >= 0);
4719 
4720  /*
4721  * If we're removing the tail element then advance tail pointer over
4722  * any invalid elements. This will speed future searches.
4723  */
4724  if (result_index == tail)
4725  {
4726  tail++;
4727  while (tail < head && !KnownAssignedXidsValid[tail])
4728  tail++;
4729  if (tail >= head)
4730  {
4731  /* Array is empty, so we can reset both pointers */
4732  pArray->headKnownAssignedXids = 0;
4733  pArray->tailKnownAssignedXids = 0;
4734  }
4735  else
4736  {
4737  pArray->tailKnownAssignedXids = tail;
4738  }
4739  }
4740  }
4741 
4742  return true;
4743 }
4744 
4745 /*
4746  * Is the specified XID present in KnownAssignedXids[]?
4747  *
4748  * Caller must hold ProcArrayLock in shared or exclusive mode.
4749  */
4750 static bool
4752 {
4754 
4755  return KnownAssignedXidsSearch(xid, false);
4756 }
4757 
4758 /*
4759  * Remove the specified XID from KnownAssignedXids[].
4760  *
4761  * Caller must hold ProcArrayLock in exclusive mode.
4762  */
4763 static void
4765 {
4767 
4768  elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
4769 
4770  /*
4771  * Note: we cannot consider it an error to remove an XID that's not
4772  * present. We intentionally remove subxact IDs while processing
4773  * XLOG_XACT_ASSIGNMENT, to avoid array overflow. Then those XIDs will be
4774  * removed again when the top-level xact commits or aborts.
4775  *
4776  * It might be possible to track such XIDs to distinguish this case from
4777  * actual errors, but it would be complicated and probably not worth it.
4778  * So, just ignore the search result.
4779  */
4780  (void) KnownAssignedXidsSearch(xid, true);
4781 }
4782 
4783 /*
4784  * KnownAssignedXidsRemoveTree
4785  * Remove xid (if it's not InvalidTransactionId) and all the subxids.
4786  *
4787  * Caller must hold ProcArrayLock in exclusive mode.
4788  */
4789 static void
4791  TransactionId *subxids)
4792 {
4793  int i;
4794 
4795  if (TransactionIdIsValid(xid))
4797 
4798  for (i = 0; i < nsubxids; i++)
4799  KnownAssignedXidsRemove(subxids[i]);
4800 
4801  /* Opportunistically compress the array */
4803 }
4804 
4805 /*
4806  * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid
4807  * then clear the whole table.
4808  *
4809  * Caller must hold ProcArrayLock in exclusive mode.
4810  */
4811 static void
4813 {
4814  ProcArrayStruct *pArray = procArray;
4815  int count = 0;
4816  int head,
4817  tail,
4818  i;
4819 
4820  if (!TransactionIdIsValid(removeXid))
4821  {
4822  elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
4823  pArray->numKnownAssignedXids = 0;
4824  pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0;
4825  return;
4826  }
4827 
4828  elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid);
4829 
4830  /*
4831  * Mark entries invalid starting at the tail. Since array is sorted, we
4832  * can stop as soon as we reach an entry >= removeXid.
4833  */
4834  tail = pArray->tailKnownAssignedXids;
4835  head = pArray->headKnownAssignedXids;
4836 
4837  for (i = tail; i < head; i++)
4838  {
4839  if (KnownAssignedXidsValid[i])
4840  {
4841  TransactionId knownXid = KnownAssignedXids[i];
4842 
4843  if (TransactionIdFollowsOrEquals(knownXid, removeXid))
4844  break;
4845 
4846  if (!StandbyTransactionIdIsPrepared(knownXid))
4847  {
4848  KnownAssignedXidsValid[i] = false;
4849  count++;
4850  }
4851  }
4852  }
4853 
4854  pArray->numKnownAssignedXids -= count;
4855  Assert(pArray->numKnownAssignedXids >= 0);
4856 
4857  /*
4858  * Advance the tail pointer if we've marked the tail item invalid.
4859  */
4860  for (i = tail; i < head; i++)
4861  {
4862  if (KnownAssignedXidsValid[i])
4863  break;
4864  }
4865  if (i >= head)
4866  {
4867  /* Array is empty, so we can reset both pointers */
4868  pArray->headKnownAssignedXids = 0;
4869  pArray->tailKnownAssignedXids = 0;
4870  }
4871  else
4872  {
4873  pArray->tailKnownAssignedXids = i;
4874  }
4875 
4876  /* Opportunistically compress the array */
4878 }
4879 
4880 /*
4881  * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
4882  * We filter out anything >= xmax.
4883  *
4884  * Returns the number of XIDs stored into xarray[]. Caller is responsible
4885  * that array is large enough.
4886  *
4887  * Caller must hold ProcArrayLock in (at least) shared mode.
4888  */
4889 static int
4891 {
4893 
4894  return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax);
4895 }
4896 
4897 /*
4898  * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus
4899  * we reduce *xmin to the lowest xid value seen if not already lower.
4900  *
4901  * Caller must hold ProcArrayLock in (at least) shared mode.
4902  */
4903 static int
4905  TransactionId xmax)
4906 {
4907  int count = 0;
4908  int head,
4909  tail;
4910  int i;
4911 
4912  /*
4913  * Fetch head just once, since it may change while we loop. We can stop
4914  * once we reach the initially seen head, since we are certain that an xid
4915  * cannot enter and then leave the array while we hold ProcArrayLock. We
4916  * might miss newly-added xids, but they should be >= xmax so irrelevant
4917  * anyway.
4918  *
4919  * Must take spinlock to ensure we see up-to-date array contents.
4920  */
4922  tail = procArray->tailKnownAssignedXids;
4923  head = procArray->headKnownAssignedXids;
4925 
4926  for (i = tail; i < head; i++)
4927  {
4928  /* Skip any gaps in the array */
4929  if (KnownAssignedXidsValid[i])
4930  {
4931  TransactionId knownXid = KnownAssignedXids[i];
4932 
4933  /*
4934  * Update xmin if required. Only the first XID need be checked,
4935  * since the array is sorted.
4936  */
4937  if (count == 0 &&
4938  TransactionIdPrecedes(knownXid, *xmin))
4939  *xmin = knownXid;
4940 
4941  /*
4942  * Filter out anything >= xmax, again relying on sorted property
4943  * of array.
4944  */
4945  if (TransactionIdIsValid(xmax) &&
4946  TransactionIdFollowsOrEquals(knownXid, xmax))
4947  break;
4948 
4949  /* Add knownXid into output array */
4950  xarray[count++] = knownXid;
4951  }
4952  }
4953 
4954  return count;
4955 }
4956 
4957 /*
4958  * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId
4959  * if nothing there.
4960  */
4961 static TransactionId
4963 {
4964  int head,
4965  tail;
4966  int i;
4967 
4968  /*
4969  * Fetch head just once, since it may change while we loop.
4970  */
4972  tail = procArray->tailKnownAssignedXids;
4973  head = procArray->headKnownAssignedXids;
4975 
4976  for (i = tail; i < head; i++)
4977  {
4978  /* Skip any gaps in the array */
4979  if (KnownAssignedXidsValid[i])
4980  return KnownAssignedXids[i];
4981  }
4982 
4983  return InvalidTransactionId;
4984 }
4985 
4986 /*
4987  * Display KnownAssignedXids to provide debug trail
4988  *
4989  * Currently this is only called within startup process, so we need no
4990  * special locking.
4991  *
4992  * Note this is pretty expensive, and much of the expense will be incurred
4993  * even if the elog message will get discarded. It's not currently called
4994  * in any performance-critical places, however, so no need to be tenser.
4995  */
4996 static void
4998 {
4999  ProcArrayStruct *pArray = procArray;
5001  int head,
5002  tail,
5003  i;
5004  int nxids = 0;
5005 
5006  tail = pArray->tailKnownAssignedXids;
5007  head = pArray->headKnownAssignedXids;
5008 
5009  initStringInfo(&buf);
5010 
5011  for (i = tail; i < head; i++)
5012  {
5013  if (KnownAssignedXidsValid[i])
5014  {
5015  nxids++;
5016  appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
5017  }
5018  }
5019 
5020  elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",
5021  nxids,
5022  pArray->numKnownAssignedXids,
5023  pArray->tailKnownAssignedXids,
5024  pArray->headKnownAssignedXids,
5025  buf.data);
5026 
5027  pfree(buf.data);
5028 }
5029 
5030 /*
5031  * KnownAssignedXidsReset
5032  * Resets KnownAssignedXids to be empty
5033  */
5034 static void
5036 {
5037  ProcArrayStruct *pArray = procArray;
5038 
5039  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
5040 
5041  pArray->numKnownAssignedXids = 0;
5042  pArray->tailKnownAssignedXids = 0;
5043  pArray->headKnownAssignedXids = 0;
5044 
5045  LWLockRelease(ProcArrayLock);
5046 }
#define TransactionIdAdvance(dest)
Definition: transam.h:91
int slock_t
Definition: s_lock.h:934
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1000
#define NIL
Definition: pg_list.h:65
#define AmStartupProcess()
Definition: miscadmin.h:432
static TransactionId latestObservedXid
Definition: procarray.c:259
TransactionId oldest_considered_running
Definition: procarray.c:207
VirtualTransactionId * GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, bool allDbs, int excludeVacuum, int *nvxids)
Definition: procarray.c:3164
TransactionId oldestRunningXid
Definition: standby.h:80
bool procArrayGroupMember
Definition: proc.h:214
uint64 snapXactCompletionCount
Definition: snapshot.h:216
pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
Definition: procarray.c:3326
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:96
#define PROCARRAY_MAXPROCS
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition: procarray.c:2850
static void ComputeXidHorizons(ComputeXidHorizonsResult *h)
Definition: procarray.c:1656
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1944
FullTransactionId latest_completed
Definition: procarray.c:187
int CountDBBackends(Oid databaseid)
Definition: procarray.c:3434
#define DEBUG1
Definition: elog.h:25
#define likely(x)
Definition: c.h:260
TransactionId shared_oldest_nonremovable_raw
Definition: procarray.c:227
static void KnownAssignedXidsDisplay(int trace_level)
Definition: procarray.c:4997
#define GET_VXID_FROM_PGPROC(vxid, proc)
Definition: lock.h:79
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
#define PROC_IN_LOGICAL_DECODING
Definition: proc.h:61
BackendId backendId
Definition: proc.h:153
uint32 TransactionId
Definition: c.h:575
bool copied
Definition: snapshot.h:185
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition: varsup.c:277
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: posix_sema.c:340
#define DEBUG3
Definition: elog.h:23
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:869
Oid GetUserId(void)
Definition: miscinit.c:476
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:150
XidCacheStatus * subxidStates
Definition: proc.h:325
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1926
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
static void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:681
PGPROC * BackendPidGetProc(int pid)
Definition: procarray.c:3036
#define FullTransactionIdIsValid(x)
Definition: transam.h:55
PGPROC * MyProc
Definition: proc.c:68
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1320
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:102
TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids)
Definition: transam.c:365
int vacuum_defer_cleanup_age
Definition: standby.c:39
#define UINT32_ACCESS_ONCE(var)
Definition: procarray.c:69
#define SpinLockInit(lock)
Definition: spin.h:60
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:584
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:4895
VirtualTransactionId * GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
Definition: procarray.c:3252
TransactionId replication_slot_catalog_xmin
Definition: procarray.c:98
XLogRecPtr lsn
Definition: snapshot.h:209
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:349
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids)
Definition: procarray.c:2946
static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:739
unsigned char uint8
Definition: c.h:427
static FullTransactionId FullXidRelativeTo(FullTransactionId rel, TransactionId xid)
Definition: procarray.c:4184
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:338
#define xc_by_my_xact_inc()
Definition: procarray.c:313
static void MaintainLatestCompletedXidRecovery(TransactionId latestXid)
Definition: procarray.c:935
Oid roleId
Definition: proc.h:155
TransactionId oldestXid
Definition: transam.h:215
int errcode(int sqlerrcode)
Definition: elog.c:704
TransactionId RecentXmin
Definition: snapmgr.c:113
uint64 xactCompletionCount
Definition: transam.h:241
slock_t known_assigned_xids_lck
Definition: procarray.c:84
bool superuser(void)
Definition: superuser.c:46
PROC_HDR * ProcGlobal
Definition: proc.c:80
bool suboverflowed
Definition: snapshot.h:182
TransactionId * xids
Definition: standby.h:83
#define kill(pid, sig)
Definition: win32_port.h:454
uint8 statusFlags
Definition: proc.h:187
bool GlobalVisTestIsRemovableFullXid(GlobalVisState *state, FullTransactionId fxid)
Definition: procarray.c:4055
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:125
FullTransactionId latestCompletedXid
Definition: transam.h:231
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:110
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:8148
static FullTransactionId FullTransactionIdFromU64(uint64 value)
Definition: transam.h:81
#define TransactionIdRetreat(dest)
Definition: transam.h:141
LocalTransactionId localTransactionId
Definition: lock.h:65
void ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:620
#define xc_by_child_xid_inc()
Definition: procarray.c:316
bool TransactionIdIsKnownCompleted(TransactionId transactionId)
Definition: transam.c:238
#define DEBUG4
Definition: elog.h:22
#define fprintf
Definition: port.h:219
void ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, TransactionId *subxids, TransactionId max_xid)
Definition: procarray.c:4337
#define MAXAUTOVACPIDS
FullTransactionId nextXid
Definition: transam.h:213
uint32 regd_count
Definition: snapshot.h:205
#define OidIsValid(objectId)
Definition: c.h:698
void ProcArrayGetReplicationSlotXmin(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:3793
TransactionId catalog_oldest_nonremovable
Definition: procarray.c:233
XidCacheStatus subxidStatus
Definition: proc.h:208
void ExpireAllKnownAssignedTransactionIds(void)
Definition: procarray.c:4363
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4097
signed int int32
Definition: c.h:417
int trace_recovery(int trace_level)
Definition: elog.c:3602
#define PROC_VACUUM_STATE_MASK
Definition: proc.h:65
bool overflowed
Definition: proc.h:43
#define XidFromFullTransactionId(x)
Definition: transam.h:48
TransactionId TransactionXmin
Definition: snapmgr.c:112
TransactionId latestCompletedXid
Definition: standby.h:81
FullTransactionId definitely_needed
Definition: procarray.c:172
Definition: type.h:89
#define malloc(a)
Definition: header.h:50
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1810
static uint32 pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval)
Definition: atomics.h:292
bool isBackgroundWorker
Definition: proc.h:160
#define VirtualTransactionIdEquals(vxid1, vxid2)
Definition: lock.h:73
#define xc_by_recent_xmin_inc()
Definition: procarray.c:311
#define xc_by_known_xact_inc()
Definition: procarray.c:312
static void GetSnapshotDataInitOldSnapshot(Snapshot snapshot)
Definition: procarray.c:1992
bool MinimumActiveBackends(int min)
Definition: procarray.c:3381
static void KnownAssignedXidsRemovePreceding(TransactionId xid)
Definition: procarray.c:4812
PGPROC * BackendPidGetProcWithLock(int pid)
Definition: procarray.c:3059
#define SpinLockAcquire(lock)
Definition: spin.h:62
void pg_usleep(long microsec)
Definition: signal.c:53
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:3929
int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
Definition: procsignal.c:256
pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3332
#define AssertTransactionIdInAllowableRange(xid)
Definition: transam.h:294
void pfree(void *pointer)
Definition: mcxt.c:1057
#define PROC_IN_VACUUM
Definition: proc.h:55
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
int CountDBConnections(Oid databaseid)
Definition: procarray.c:3464
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1913
#define ERROR
Definition: elog.h:45
void XidCacheRemoveRunningXids(TransactionId xid, int nxids, const TransactionId *xids, TransactionId latestXid)
Definition: procarray.c:3816
#define FullTransactionIdIsNormal(x)
Definition: transam.h:58
TimestampTz GetSnapshotCurrentTimestamp(void)
Definition: snapmgr.c:1635
bool delayChkpt
Definition: proc.h:185
void ProcArrayClearTransaction(PGPROC *proc)
Definition: procarray.c:853
#define lfirst_int(lc)
Definition: pg_list.h:170
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
void ExtendSUBTRANS(TransactionId newestXact)
Definition: subtrans.c:308
void ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:525
#define FATAL
Definition: elog.h:54
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:11657
TransactionId slot_catalog_xmin
Definition: procarray.c:194
void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3495
TransactionId xmin
Definition: proc.h:138
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2155
#define xc_by_main_xid_inc()
Definition: procarray.c:315
static bool GlobalVisTestShouldUpdate(GlobalVisState *state)
Definition: procarray.c:3980
static GlobalVisState GlobalVisSharedRels
Definition: procarray.c:273
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1378
static char * buf
Definition: pg_test_fsync.c:68
bool recoveryConflictPending
Definition: proc.h:167
#define xc_by_known_assigned_inc()
Definition: procarray.c:317
bool IsUnderPostmaster
Definition: globals.c:110
VariableCache ShmemVariableCache
Definition: varsup.c:34
int maxKnownAssignedXids
Definition: procarray.c:80
#define InvalidTransactionId
Definition: transam.h:31
bool StandbyTransactionIdIsPrepared(TransactionId xid)
Definition: twophase.c:1369
TransactionId * xids
Definition: proc.h:319
static PGPROC * allProcs
Definition: procarray.c:252
Oid databaseId
Definition: proc.h:154
unsigned int uint32
Definition: c.h:429
TransactionId shared_oldest_nonremovable
Definition: procarray.c:216
TransactionId xmax
Definition: snapshot.h:158
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1512
TransactionId xmin
Definition: snapshot.h:157
static void KnownAssignedXidsReset(void)
Definition: procarray.c:5035
LOCK * waitLock
Definition: proc.h:179
int numKnownAssignedXids
Definition: procarray.c:81
static bool * KnownAssignedXidsValid
Definition: procarray.c:258
struct XidCache subxids
Definition: proc.h:210
TransactionId lastOverflowedXid
Definition: procarray.c:93
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:181
#define xc_by_latest_xid_inc()
Definition: procarray.c:314
bool superuser_arg(Oid roleid)
Definition: superuser.c:56
#define INVALID_PGPROCNO
Definition: proc.h:80
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId * xip
Definition: snapshot.h:168
static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:4790
pg_atomic_uint32 procArrayGroupNext
Definition: proc.h:216
List * lappend_int(List *list, int datum)
Definition: list.c:339
Definition: proc.h:313
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
static ProcArrayStruct * procArray
Definition: procarray.c:250
#define WARNING
Definition: elog.h:40
#define VirtualTransactionIdIsValid(vxid)
Definition: lock.h:70
signed char int8
Definition: c.h:415
static TransactionId ComputeXidHorizonsResultLastXmin
Definition: procarray.c:283
#define SpinLockRelease(lock)
Definition: spin.h:64
TransactionId replication_slot_xmin
Definition: procarray.c:96
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
int BackendXidGetPid(TransactionId xid)
Definition: procarray.c:3096
#define InvalidBackendId
Definition: backendid.h:23
static void MaintainLatestCompletedXid(TransactionId latestXid)
Definition: procarray.c:913
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:620
void GetReplicationHorizons(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:1950
static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
Definition: procarray.c:4890
bool GlobalVisIsRemovableFullXid(Relation rel, FullTransactionId fxid)
Definition: procarray.c:4149
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
Oid MyDatabaseId
Definition: globals.c:86
static TransactionId KnownAssignedXidsGetOldestXmin(void)
Definition: procarray.c:4962
#define InvalidOid
Definition: postgres_ext.h:36
CommandId curcid
Definition: snapshot.h:187
#define ereport(elevel,...)
Definition: elog.h:155
bool GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
Definition: procarray.c:4163
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:1972
int pgprocnos[FLEXIBLE_ARRAY_MEMBER]
Definition: procarray.c:101
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]
Definition: proc.h:48
TransactionId temp_oldest_nonremovable
Definition: procarray.c:245
#define TOTAL_MAX_CACHED_SUBXIDS
static TransactionId TransactionIdOlder(TransactionId a, TransactionId b)
Definition: transam.h:327
#define Assert(condition)
Definition: c.h:792
static TransactionId * KnownAssignedXids
Definition: procarray.c:257
BackendId backendId
Definition: lock.h:64
Definition: regguts.h:298
#define pg_read_barrier()
Definition: atomics.h:158
#define U64FromFullTransactionId(x)
Definition: transam.h:49
void CreateSharedProcArray(void)
Definition: procarray.c:394
#define FullTransactionIdFollowsOrEquals(a, b)
Definition: transam.h:54
bool takenDuringRecovery
Definition: snapshot.h:184
size_t Size
Definition: c.h:528
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1488
struct ComputeXidHorizonsResult ComputeXidHorizonsResult
Snapshot GetSnapshotData(Snapshot snapshot)
Definition: procarray.c:2109
int errdetail_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1140
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1206
static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
Definition: procarray.c:3999
static TransactionId TransactionIdRetreatedBy(TransactionId xid, uint32 amount)
Definition: transam.h:315
static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, TransactionId xmax)
Definition: procarray.c:4904
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, bool exclusive_lock)
Definition: procarray.c:4540
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:1937
bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
Definition: procarray.c:2547
#define NormalTransactionIdPrecedes(id1, id2)
Definition: transam.h:147
#define xc_no_overflow_inc()
Definition: procarray.c:318
bool EnableHotStandby
Definition: xlog.c:98
FullTransactionId maybe_needed
Definition: procarray.c:175
void PGSemaphoreLock(PGSemaphore sema)
Definition: posix_sema.c:320
static void KnownAssignedXidsCompress(bool force)
Definition: procarray.c:4478
uint8 count
Definition: proc.h:41
int CountUserBackends(Oid roleid)
Definition: procarray.c:3535
TransactionId xid
Definition: proc.h:133
static FullTransactionId FullTransactionIdNewer(FullTransactionId a, FullTransactionId b)
Definition: transam.h:353
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:394
static bool KnownAssignedXidExists(TransactionId xid)
Definition: procarray.c:4751
static GlobalVisState GlobalVisTempRels
Definition: procarray.c:276
int pgprocno
Definition: proc.h:150
TransactionId nextXid
Definition: standby.h:79
bool TransactionIdIsActive(TransactionId xid)
Definition: procarray.c:1552
#define xc_slow_answer_inc()
Definition: procarray.c:319
pg_atomic_uint32 procArrayGroupFirst
Definition: proc.h:344
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:969
uint32 xcnt
Definition: snapshot.h:169
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:915
struct ProcArrayStruct ProcArrayStruct
static bool KnownAssignedXidsSearch(TransactionId xid, bool remove)
Definition: procarray.c:4658
FullTransactionId GlobalVisTestNonRemovableFullHorizon(GlobalVisState *state)
Definition: procarray.c:4124
static void KnownAssignedXidsRemove(TransactionId xid)
Definition: procarray.c:4764
#define elog(elevel,...)
Definition: elog.h:228
#define InvalidLocalTransactionId
Definition: lock.h:68
TransactionId data_oldest_nonremovable
Definition: procarray.c:239
int i
int pgxactoff
Definition: proc.h:148
void ExpireOldKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:4375
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2785
bool IsBackendPid(int pid)
Definition: procarray.c:3131
#define pg_write_barrier()
Definition: atomics.h:159
ProcSignalReason
Definition: procsignal.h:30
static bool GetSnapshotDataReuse(Snapshot snapshot)
Definition: procarray.c:2026
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
Definition: procarray.c:3771
#define unlikely(x)
Definition: c.h:261
int GetMaxSnapshotSubxidCount(void)
Definition: procarray.c:1983
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2610
void ProcArrayApplyXidAssignment(TransactionId topxid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:1236
TimestampTz whenTaken
Definition: snapshot.h:208
void TerminateOtherDBBackends(Oid databaseId)
Definition: procarray.c:3663
PGPROC * allProcs
Definition: proc.h:316
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:100
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
bool CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
Definition: procarray.c:3585
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:761
uint8 * statusFlags
Definition: proc.h:331
#define qsort(a, b, c, d)
Definition: port.h:503
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static void GlobalVisUpdate(void)
Definition: procarray.c:4038
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
Definition: snapmgr.c:1854
PGSemaphore sem
Definition: proc.h:127
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
Definition: procarray.c:2990
static GlobalVisState GlobalVisCatalogRels
Definition: procarray.c:274
TransactionId GlobalVisTestNonRemovableHorizon(GlobalVisState *state)
Definition: procarray.c:4135
void RecordKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:4266
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void SubTransSetParent(TransactionId xid, TransactionId parent)
Definition: subtrans.c:74
static GlobalVisState GlobalVisDataRels
Definition: procarray.c:275
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
Definition: procarray.c:2472
int tailKnownAssignedXids
Definition: procarray.c:82
TransactionId slot_xmin
Definition: procarray.c:193
static TransactionId standbySnapshotPendingXmin
Definition: procarray.c:266
Definition: proc.h:121
static void FullTransactionIdAdvance(FullTransactionId *dest)
Definition: transam.h:128
Definition: pg_list.h:50
int pid
Definition: proc.h:146
HotStandbyState standbyState
Definition: xlog.c:209
void ProcArrayAdd(PGPROC *proc)
Definition: procarray.c:445
#define PROC_IS_AUTOVACUUM
Definition: proc.h:54
#define offsetof(type, field)
Definition: c.h:715
TransactionId procArrayGroupMemberXid
Definition: proc.h:222
Size ProcArrayShmemSize(void)
Definition: procarray.c:352
void StandbyReleaseOldLocks(TransactionId oldxid)
Definition: standby.c:1005
TransactionId * subxip
Definition: snapshot.h:180
uint32 active_count
Definition: snapshot.h:204
int headKnownAssignedXids
Definition: procarray.c:83
int xidComparator(const void *arg1, const void *arg2)
Definition: xid.c:136
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
int32 subxcnt
Definition: snapshot.h:181
LocalTransactionId lxid
Definition: proc.h:143