PostgreSQL Source Code git master
procarray.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * procarray.c
4 * POSTGRES process array code.
5 *
6 *
7 * This module maintains arrays of PGPROC substructures, as well as associated
8 * arrays in ProcGlobal, for all active backends. Although there are several
9 * uses for this, the principal one is as a means of determining the set of
10 * currently running transactions.
11 *
12 * Because of various subtle race conditions it is critical that a backend
13 * hold the correct locks while setting or clearing its xid (in
14 * ProcGlobal->xids[]/MyProc->xid). See notes in
15 * src/backend/access/transam/README.
16 *
17 * The process arrays now also include structures representing prepared
18 * transactions. The xid and subxids fields of these are valid, as are the
19 * myProcLocks lists. They can be distinguished from regular backend PGPROCs
20 * at need by checking for pid == 0.
21 *
22 * During hot standby, we also keep a list of XIDs representing transactions
23 * that are known to be running on the primary (or more precisely, were running
24 * as of the current point in the WAL stream). This list is kept in the
25 * KnownAssignedXids array, and is updated by watching the sequence of
26 * arriving XIDs. This is necessary because if we leave those XIDs out of
27 * snapshots taken for standby queries, then they will appear to be already
28 * complete, leading to MVCC failures. Note that in hot standby, the PGPROC
29 * array represents standby processes, which by definition are not running
30 * transactions that have XIDs.
31 *
32 * It is perhaps possible for a backend on the primary to terminate without
33 * writing an abort record for its transaction. While that shouldn't really
34 * happen, it would tie up KnownAssignedXids indefinitely, so we protect
35 * ourselves by pruning the array when a valid list of running XIDs arrives.
36 *
37 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
38 * Portions Copyright (c) 1994, Regents of the University of California
39 *
40 *
41 * IDENTIFICATION
42 * src/backend/storage/ipc/procarray.c
43 *
44 *-------------------------------------------------------------------------
45 */
46#include "postgres.h"
47
48#include <signal.h>
49
50#include "access/subtrans.h"
51#include "access/transam.h"
52#include "access/twophase.h"
53#include "access/xact.h"
54#include "access/xlogutils.h"
55#include "catalog/catalog.h"
56#include "catalog/pg_authid.h"
57#include "miscadmin.h"
58#include "pgstat.h"
59#include "postmaster/bgworker.h"
60#include "port/pg_lfind.h"
61#include "storage/proc.h"
62#include "storage/procarray.h"
63#include "utils/acl.h"
64#include "utils/builtins.h"
66#include "utils/lsyscache.h"
67#include "utils/rel.h"
68#include "utils/snapmgr.h"
69
70#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
71
72/* Our shared memory area */
73typedef struct ProcArrayStruct
74{
75 int numProcs; /* number of valid procs entries */
76 int maxProcs; /* allocated size of procs array */
77
78 /*
79 * Known assigned XIDs handling
80 */
81 int maxKnownAssignedXids; /* allocated size of array */
82 int numKnownAssignedXids; /* current # of valid entries */
83 int tailKnownAssignedXids; /* index of oldest valid element */
84 int headKnownAssignedXids; /* index of newest element, + 1 */
85
86 /*
87 * Highest subxid that has been removed from KnownAssignedXids array to
88 * prevent overflow; or InvalidTransactionId if none. We track this for
89 * similar reasons to tracking overflowing cached subxids in PGPROC
90 * entries. Must hold exclusive ProcArrayLock to change this, and shared
91 * lock to read it.
92 */
94
95 /* oldest xmin of any replication slot */
97 /* oldest catalog xmin of any replication slot */
99
100 /* indexes into allProcs[], has PROCARRAY_MAXPROCS entries */
103
104/*
105 * State for the GlobalVisTest* family of functions. Those functions can
106 * e.g. be used to decide if a deleted row can be removed without violating
107 * MVCC semantics: If the deleted row's xmax is not considered to be running
108 * by anyone, the row can be removed.
109 *
110 * To avoid slowing down GetSnapshotData(), we don't calculate a precise
111 * cutoff XID while building a snapshot (looking at the frequently changing
112 * xmins scales badly). Instead we compute two boundaries while building the
113 * snapshot:
114 *
115 * 1) definitely_needed, indicating that rows deleted by XIDs >=
116 * definitely_needed are definitely still visible.
117 *
118 * 2) maybe_needed, indicating that rows deleted by XIDs < maybe_needed can
119 * definitely be removed
120 *
121 * When testing an XID that falls in between the two (i.e. XID >= maybe_needed
122 * && XID < definitely_needed), the boundaries can be recomputed (using
123 * ComputeXidHorizons()) to get a more accurate answer. This is cheaper than
124 * maintaining an accurate value all the time.
125 *
126 * As it is not cheap to compute accurate boundaries, we limit the number of
127 * times that happens in short succession. See GlobalVisTestShouldUpdate().
128 *
129 *
130 * There are three backend lifetime instances of this struct, optimized for
131 * different types of relations. As e.g. a normal user defined table in one
132 * database is inaccessible to backends connected to another database, a test
133 * specific to a relation can be more aggressive than a test for a shared
134 * relation. Currently we track four different states:
135 *
136 * 1) GlobalVisSharedRels, which only considers an XID's
137 * effects visible-to-everyone if neither snapshots in any database, nor a
138 * replication slot's xmin, nor a replication slot's catalog_xmin might
139 * still consider XID as running.
140 *
141 * 2) GlobalVisCatalogRels, which only considers an XID's
142 * effects visible-to-everyone if neither snapshots in the current
143 * database, nor a replication slot's xmin, nor a replication slot's
144 * catalog_xmin might still consider XID as running.
145 *
146 * I.e. the difference to GlobalVisSharedRels is that
147 * snapshot in other databases are ignored.
148 *
149 * 3) GlobalVisDataRels, which only considers an XID's
150 * effects visible-to-everyone if neither snapshots in the current
151 * database, nor a replication slot's xmin consider XID as running.
152 *
153 * I.e. the difference to GlobalVisCatalogRels is that
154 * replication slot's catalog_xmin is not taken into account.
155 *
156 * 4) GlobalVisTempRels, which only considers the current session, as temp
157 * tables are not visible to other sessions.
158 *
159 * GlobalVisTestFor(relation) returns the appropriate state
160 * for the relation.
161 *
162 * The boundaries are FullTransactionIds instead of TransactionIds to avoid
163 * wraparound dangers. There e.g. would otherwise exist no procarray state to
164 * prevent maybe_needed to become old enough after the GetSnapshotData()
165 * call.
166 *
167 * The typedef is in the header.
168 */
170{
171 /* XIDs >= are considered running by some backend */
173
174 /* XIDs < are not considered to be running by any backend */
176};
177
178/*
179 * Result of ComputeXidHorizons().
180 */
182{
183 /*
184 * The value of TransamVariables->latestCompletedXid when
185 * ComputeXidHorizons() held ProcArrayLock.
186 */
188
189 /*
190 * The same for procArray->replication_slot_xmin and
191 * procArray->replication_slot_catalog_xmin.
192 */
195
196 /*
197 * Oldest xid that any backend might still consider running. This needs to
198 * include processes running VACUUM, in contrast to the normal visibility
199 * cutoffs, as vacuum needs to be able to perform pg_subtrans lookups when
200 * determining visibility, but doesn't care about rows above its xmin to
201 * be removed.
202 *
203 * This likely should only be needed to determine whether pg_subtrans can
204 * be truncated. It currently includes the effects of replication slots,
205 * for historical reasons. But that could likely be changed.
206 */
208
209 /*
210 * Oldest xid for which deleted tuples need to be retained in shared
211 * tables.
212 *
213 * This includes the effects of replication slots. If that's not desired,
214 * look at shared_oldest_nonremovable_raw;
215 */
217
218 /*
219 * Oldest xid that may be necessary to retain in shared tables. This is
220 * the same as shared_oldest_nonremovable, except that is not affected by
221 * replication slot's catalog_xmin.
222 *
223 * This is mainly useful to be able to send the catalog_xmin to upstream
224 * streaming replication servers via hot_standby_feedback, so they can
225 * apply the limit only when accessing catalog tables.
226 */
228
229 /*
230 * Oldest xid for which deleted tuples need to be retained in non-shared
231 * catalog tables.
232 */
234
235 /*
236 * Oldest xid for which deleted tuples need to be retained in normal user
237 * defined tables.
238 */
240
241 /*
242 * Oldest xid for which deleted tuples need to be retained in this
243 * session's temporary tables.
244 */
247
248/*
249 * Return value for GlobalVisHorizonKindForRel().
250 */
252{
258
259/*
260 * Reason codes for KnownAssignedXidsCompress().
261 */
263{
264 KAX_NO_SPACE, /* need to free up space at array end */
265 KAX_PRUNE, /* we just pruned old entries */
266 KAX_TRANSACTION_END, /* we just committed/removed some XIDs */
267 KAX_STARTUP_PROCESS_IDLE, /* startup process is about to sleep */
269
270
272
274
275/*
276 * Cache to reduce overhead of repeated calls to TransactionIdIsInProgress()
277 */
279
280/*
281 * Bookkeeping for tracking emulated transactions in recovery
282 */
286
287/*
288 * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
289 * the highest xid that might still be running that we don't have in
290 * KnownAssignedXids.
291 */
293
294/*
295 * State for visibility checks on different types of relations. See struct
296 * GlobalVisState for details. As shared, catalog, normal and temporary
297 * relations can have different horizons, one such state exists for each.
298 */
303
304/*
305 * This backend's RecentXmin at the last time the accurate xmin horizon was
306 * recomputed, or InvalidTransactionId if it has not. Used to limit how many
307 * times accurate horizons are recomputed. See GlobalVisTestShouldUpdate().
308 */
310
311#ifdef XIDCACHE_DEBUG
312
313/* counters for XidCache measurement */
314static long xc_by_recent_xmin = 0;
315static long xc_by_known_xact = 0;
316static long xc_by_my_xact = 0;
317static long xc_by_latest_xid = 0;
318static long xc_by_main_xid = 0;
319static long xc_by_child_xid = 0;
320static long xc_by_known_assigned = 0;
321static long xc_no_overflow = 0;
322static long xc_slow_answer = 0;
323
324#define xc_by_recent_xmin_inc() (xc_by_recent_xmin++)
325#define xc_by_known_xact_inc() (xc_by_known_xact++)
326#define xc_by_my_xact_inc() (xc_by_my_xact++)
327#define xc_by_latest_xid_inc() (xc_by_latest_xid++)
328#define xc_by_main_xid_inc() (xc_by_main_xid++)
329#define xc_by_child_xid_inc() (xc_by_child_xid++)
330#define xc_by_known_assigned_inc() (xc_by_known_assigned++)
331#define xc_no_overflow_inc() (xc_no_overflow++)
332#define xc_slow_answer_inc() (xc_slow_answer++)
333
334static void DisplayXidCache(void);
335#else /* !XIDCACHE_DEBUG */
336
337#define xc_by_recent_xmin_inc() ((void) 0)
338#define xc_by_known_xact_inc() ((void) 0)
339#define xc_by_my_xact_inc() ((void) 0)
340#define xc_by_latest_xid_inc() ((void) 0)
341#define xc_by_main_xid_inc() ((void) 0)
342#define xc_by_child_xid_inc() ((void) 0)
343#define xc_by_known_assigned_inc() ((void) 0)
344#define xc_no_overflow_inc() ((void) 0)
345#define xc_slow_answer_inc() ((void) 0)
346#endif /* XIDCACHE_DEBUG */
347
348/* Primitives for KnownAssignedXids array handling for standby */
349static void KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock);
350static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
351 bool exclusive_lock);
352static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
355static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
356 TransactionId *subxids);
358static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
360 TransactionId *xmin,
361 TransactionId xmax);
363static void KnownAssignedXidsDisplay(int trace_level);
364static void KnownAssignedXidsReset(void);
365static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid);
366static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
367static void MaintainLatestCompletedXid(TransactionId latestXid);
369
371 TransactionId xid);
373
374/*
375 * Report shared-memory space needed by ProcArrayShmemInit
376 */
377Size
379{
380 Size size;
381
382 /* Size of the ProcArray structure itself */
383#define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts)
384
385 size = offsetof(ProcArrayStruct, pgprocnos);
386 size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
387
388 /*
389 * During Hot Standby processing we have a data structure called
390 * KnownAssignedXids, created in shared memory. Local data structures are
391 * also created in various backends during GetSnapshotData(),
392 * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
393 * main structures created in those functions must be identically sized,
394 * since we may at times copy the whole of the data structures around. We
395 * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
396 *
397 * Ideally we'd only create this structure if we were actually doing hot
398 * standby in the current run, but we don't know that yet at the time
399 * shared memory is being set up.
400 */
401#define TOTAL_MAX_CACHED_SUBXIDS \
402 ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
403
405 {
406 size = add_size(size,
407 mul_size(sizeof(TransactionId),
409 size = add_size(size,
410 mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
411 }
412
413 return size;
414}
415
416/*
417 * Initialize the shared PGPROC array during postmaster startup.
418 */
419void
421{
422 bool found;
423
424 /* Create or attach to the ProcArray shared structure */
426 ShmemInitStruct("Proc Array",
427 add_size(offsetof(ProcArrayStruct, pgprocnos),
428 mul_size(sizeof(int),
430 &found);
431
432 if (!found)
433 {
434 /*
435 * We're the first - initialize.
436 */
437 procArray->numProcs = 0;
447 }
448
450
451 /* Create or attach to the KnownAssignedXids arrays too, if needed */
453 {
455 ShmemInitStruct("KnownAssignedXids",
456 mul_size(sizeof(TransactionId),
458 &found);
459 KnownAssignedXidsValid = (bool *)
460 ShmemInitStruct("KnownAssignedXidsValid",
461 mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
462 &found);
463 }
464}
465
466/*
467 * Add the specified PGPROC to the shared array.
468 */
469void
471{
472 int pgprocno = GetNumberFromPGProc(proc);
473 ProcArrayStruct *arrayP = procArray;
474 int index;
475 int movecount;
476
477 /* See ProcGlobal comment explaining why both locks are held */
478 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
479 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
480
481 if (arrayP->numProcs >= arrayP->maxProcs)
482 {
483 /*
484 * Oops, no room. (This really shouldn't happen, since there is a
485 * fixed supply of PGPROC structs too, and so we should have failed
486 * earlier.)
487 */
489 (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
490 errmsg("sorry, too many clients already")));
491 }
492
493 /*
494 * Keep the procs array sorted by (PGPROC *) so that we can utilize
495 * locality of references much better. This is useful while traversing the
496 * ProcArray because there is an increased likelihood of finding the next
497 * PGPROC structure in the cache.
498 *
499 * Since the occurrence of adding/removing a proc is much lower than the
500 * access to the ProcArray itself, the overhead should be marginal
501 */
502 for (index = 0; index < arrayP->numProcs; index++)
503 {
504 int this_procno = arrayP->pgprocnos[index];
505
506 Assert(this_procno >= 0 && this_procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS));
507 Assert(allProcs[this_procno].pgxactoff == index);
508
509 /* If we have found our right position in the array, break */
510 if (this_procno > pgprocno)
511 break;
512 }
513
514 movecount = arrayP->numProcs - index;
515 memmove(&arrayP->pgprocnos[index + 1],
516 &arrayP->pgprocnos[index],
517 movecount * sizeof(*arrayP->pgprocnos));
518 memmove(&ProcGlobal->xids[index + 1],
520 movecount * sizeof(*ProcGlobal->xids));
521 memmove(&ProcGlobal->subxidStates[index + 1],
523 movecount * sizeof(*ProcGlobal->subxidStates));
524 memmove(&ProcGlobal->statusFlags[index + 1],
526 movecount * sizeof(*ProcGlobal->statusFlags));
527
528 arrayP->pgprocnos[index] = GetNumberFromPGProc(proc);
529 proc->pgxactoff = index;
530 ProcGlobal->xids[index] = proc->xid;
533
534 arrayP->numProcs++;
535
536 /* adjust pgxactoff for all following PGPROCs */
537 index++;
538 for (; index < arrayP->numProcs; index++)
539 {
540 int procno = arrayP->pgprocnos[index];
541
542 Assert(procno >= 0 && procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS));
543 Assert(allProcs[procno].pgxactoff == index - 1);
544
545 allProcs[procno].pgxactoff = index;
546 }
547
548 /*
549 * Release in reversed acquisition order, to reduce frequency of having to
550 * wait for XidGenLock while holding ProcArrayLock.
551 */
552 LWLockRelease(XidGenLock);
553 LWLockRelease(ProcArrayLock);
554}
555
556/*
557 * Remove the specified PGPROC from the shared array.
558 *
559 * When latestXid is a valid XID, we are removing a live 2PC gxact from the
560 * array, and thus causing it to appear as "not running" anymore. In this
561 * case we must advance latestCompletedXid. (This is essentially the same
562 * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
563 * the ProcArrayLock only once, and don't damage the content of the PGPROC;
564 * twophase.c depends on the latter.)
565 */
566void
568{
569 ProcArrayStruct *arrayP = procArray;
570 int myoff;
571 int movecount;
572
573#ifdef XIDCACHE_DEBUG
574 /* dump stats at backend shutdown, but not prepared-xact end */
575 if (proc->pid != 0)
576 DisplayXidCache();
577#endif
578
579 /* See ProcGlobal comment explaining why both locks are held */
580 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
581 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
582
583 myoff = proc->pgxactoff;
584
585 Assert(myoff >= 0 && myoff < arrayP->numProcs);
586 Assert(ProcGlobal->allProcs[arrayP->pgprocnos[myoff]].pgxactoff == myoff);
587
588 if (TransactionIdIsValid(latestXid))
589 {
591
592 /* Advance global latestCompletedXid while holding the lock */
594
595 /* Same with xactCompletionCount */
597
599 ProcGlobal->subxidStates[myoff].overflowed = false;
600 ProcGlobal->subxidStates[myoff].count = 0;
601 }
602 else
603 {
604 /* Shouldn't be trying to remove a live transaction here */
606 }
607
609 Assert(ProcGlobal->subxidStates[myoff].count == 0);
610 Assert(ProcGlobal->subxidStates[myoff].overflowed == false);
611
612 ProcGlobal->statusFlags[myoff] = 0;
613
614 /* Keep the PGPROC array sorted. See notes above */
615 movecount = arrayP->numProcs - myoff - 1;
616 memmove(&arrayP->pgprocnos[myoff],
617 &arrayP->pgprocnos[myoff + 1],
618 movecount * sizeof(*arrayP->pgprocnos));
619 memmove(&ProcGlobal->xids[myoff],
620 &ProcGlobal->xids[myoff + 1],
621 movecount * sizeof(*ProcGlobal->xids));
622 memmove(&ProcGlobal->subxidStates[myoff],
623 &ProcGlobal->subxidStates[myoff + 1],
624 movecount * sizeof(*ProcGlobal->subxidStates));
625 memmove(&ProcGlobal->statusFlags[myoff],
626 &ProcGlobal->statusFlags[myoff + 1],
627 movecount * sizeof(*ProcGlobal->statusFlags));
628
629 arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
630 arrayP->numProcs--;
631
632 /*
633 * Adjust pgxactoff of following procs for removed PGPROC (note that
634 * numProcs already has been decremented).
635 */
636 for (int index = myoff; index < arrayP->numProcs; index++)
637 {
638 int procno = arrayP->pgprocnos[index];
639
640 Assert(procno >= 0 && procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS));
641 Assert(allProcs[procno].pgxactoff - 1 == index);
642
643 allProcs[procno].pgxactoff = index;
644 }
645
646 /*
647 * Release in reversed acquisition order, to reduce frequency of having to
648 * wait for XidGenLock while holding ProcArrayLock.
649 */
650 LWLockRelease(XidGenLock);
651 LWLockRelease(ProcArrayLock);
652}
653
654
655/*
656 * ProcArrayEndTransaction -- mark a transaction as no longer running
657 *
658 * This is used interchangeably for commit and abort cases. The transaction
659 * commit/abort must already be reported to WAL and pg_xact.
660 *
661 * proc is currently always MyProc, but we pass it explicitly for flexibility.
662 * latestXid is the latest Xid among the transaction's main XID and
663 * subtransactions, or InvalidTransactionId if it has no XID. (We must ask
664 * the caller to pass latestXid, instead of computing it from the PGPROC's
665 * contents, because the subxid information in the PGPROC might be
666 * incomplete.)
667 */
668void
670{
671 if (TransactionIdIsValid(latestXid))
672 {
673 /*
674 * We must lock ProcArrayLock while clearing our advertised XID, so
675 * that we do not exit the set of "running" transactions while someone
676 * else is taking a snapshot. See discussion in
677 * src/backend/access/transam/README.
678 */
680
681 /*
682 * If we can immediately acquire ProcArrayLock, we clear our own XID
683 * and release the lock. If not, use group XID clearing to improve
684 * efficiency.
685 */
686 if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
687 {
688 ProcArrayEndTransactionInternal(proc, latestXid);
689 LWLockRelease(ProcArrayLock);
690 }
691 else
692 ProcArrayGroupClearXid(proc, latestXid);
693 }
694 else
695 {
696 /*
697 * If we have no XID, we don't need to lock, since we won't affect
698 * anyone else's calculation of a snapshot. We might change their
699 * estimate of global xmin, but that's OK.
700 */
702 Assert(proc->subxidStatus.count == 0);
704
707
708 /* be sure this is cleared in abort */
709 proc->delayChkptFlags = 0;
710
711 proc->recoveryConflictPending = false;
712
713 /* must be cleared with xid/xmin: */
714 /* avoid unnecessarily dirtying shared cachelines */
716 {
717 Assert(!LWLockHeldByMe(ProcArrayLock));
718 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
720 proc->statusFlags &= ~PROC_VACUUM_STATE_MASK;
722 LWLockRelease(ProcArrayLock);
723 }
724 }
725}
726
727/*
728 * Mark a write transaction as no longer running.
729 *
730 * We don't do any locking here; caller must handle that.
731 */
732static inline void
734{
735 int pgxactoff = proc->pgxactoff;
736
737 /*
738 * Note: we need exclusive lock here because we're going to change other
739 * processes' PGPROC entries.
740 */
743 Assert(ProcGlobal->xids[pgxactoff] == proc->xid);
744
749
750 /* be sure this is cleared in abort */
751 proc->delayChkptFlags = 0;
752
753 proc->recoveryConflictPending = false;
754
755 /* must be cleared with xid/xmin: */
756 /* avoid unnecessarily dirtying shared cachelines */
758 {
759 proc->statusFlags &= ~PROC_VACUUM_STATE_MASK;
761 }
762
763 /* Clear the subtransaction-XID cache too while holding the lock */
764 Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
766 if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
767 {
768 ProcGlobal->subxidStates[pgxactoff].count = 0;
769 ProcGlobal->subxidStates[pgxactoff].overflowed = false;
770 proc->subxidStatus.count = 0;
771 proc->subxidStatus.overflowed = false;
772 }
773
774 /* Also advance global latestCompletedXid while holding the lock */
776
777 /* Same with xactCompletionCount */
779}
780
781/*
782 * ProcArrayGroupClearXid -- group XID clearing
783 *
784 * When we cannot immediately acquire ProcArrayLock in exclusive mode at
785 * commit time, add ourselves to a list of processes that need their XIDs
786 * cleared. The first process to add itself to the list will acquire
787 * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
788 * on behalf of all group members. This avoids a great deal of contention
789 * around ProcArrayLock when many processes are trying to commit at once,
790 * since the lock need not be repeatedly handed off from one committing
791 * process to the next.
792 */
793static void
795{
796 int pgprocno = GetNumberFromPGProc(proc);
797 PROC_HDR *procglobal = ProcGlobal;
798 uint32 nextidx;
799 uint32 wakeidx;
800
801 /* We should definitely have an XID to clear. */
803
804 /* Add ourselves to the list of processes needing a group XID clear. */
805 proc->procArrayGroupMember = true;
806 proc->procArrayGroupMemberXid = latestXid;
807 nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
808 while (true)
809 {
811
813 &nextidx,
814 (uint32) pgprocno))
815 break;
816 }
817
818 /*
819 * If the list was not empty, the leader will clear our XID. It is
820 * impossible to have followers without a leader because the first process
821 * that has added itself to the list will always have nextidx as
822 * INVALID_PROC_NUMBER.
823 */
824 if (nextidx != INVALID_PROC_NUMBER)
825 {
826 int extraWaits = 0;
827
828 /* Sleep until the leader clears our XID. */
829 pgstat_report_wait_start(WAIT_EVENT_PROCARRAY_GROUP_UPDATE);
830 for (;;)
831 {
832 /* acts as a read barrier */
833 PGSemaphoreLock(proc->sem);
834 if (!proc->procArrayGroupMember)
835 break;
836 extraWaits++;
837 }
839
841
842 /* Fix semaphore count for any absorbed wakeups */
843 while (extraWaits-- > 0)
844 PGSemaphoreUnlock(proc->sem);
845 return;
846 }
847
848 /* We are the leader. Acquire the lock on behalf of everyone. */
849 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
850
851 /*
852 * Now that we've got the lock, clear the list of processes waiting for
853 * group XID clearing, saving a pointer to the head of the list. Trying
854 * to pop elements one at a time could lead to an ABA problem.
855 */
856 nextidx = pg_atomic_exchange_u32(&procglobal->procArrayGroupFirst,
858
859 /* Remember head of list so we can perform wakeups after dropping lock. */
860 wakeidx = nextidx;
861
862 /* Walk the list and clear all XIDs. */
863 while (nextidx != INVALID_PROC_NUMBER)
864 {
865 PGPROC *nextproc = &allProcs[nextidx];
866
868
869 /* Move to next proc in list. */
870 nextidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext);
871 }
872
873 /* We're done with the lock now. */
874 LWLockRelease(ProcArrayLock);
875
876 /*
877 * Now that we've released the lock, go back and wake everybody up. We
878 * don't do this under the lock so as to keep lock hold times to a
879 * minimum. The system calls we need to perform to wake other processes
880 * up are probably much slower than the simple memory writes we did while
881 * holding the lock.
882 */
883 while (wakeidx != INVALID_PROC_NUMBER)
884 {
885 PGPROC *nextproc = &allProcs[wakeidx];
886
887 wakeidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext);
889
890 /* ensure all previous writes are visible before follower continues. */
892
893 nextproc->procArrayGroupMember = false;
894
895 if (nextproc != MyProc)
896 PGSemaphoreUnlock(nextproc->sem);
897 }
898}
899
900/*
901 * ProcArrayClearTransaction -- clear the transaction fields
902 *
903 * This is used after successfully preparing a 2-phase transaction. We are
904 * not actually reporting the transaction's XID as no longer running --- it
905 * will still appear as running because the 2PC's gxact is in the ProcArray
906 * too. We just have to clear out our own PGPROC.
907 */
908void
910{
911 int pgxactoff;
912
913 /*
914 * Currently we need to lock ProcArrayLock exclusively here, as we
915 * increment xactCompletionCount below. We also need it at least in shared
916 * mode for pgproc->pgxactoff to stay the same below.
917 *
918 * We could however, as this action does not actually change anyone's view
919 * of the set of running XIDs (our entry is duplicate with the gxact that
920 * has already been inserted into the ProcArray), lower the lock level to
921 * shared if we were to make xactCompletionCount an atomic variable. But
922 * that doesn't seem worth it currently, as a 2PC commit is heavyweight
923 * enough for this not to be the bottleneck. If it ever becomes a
924 * bottleneck it may also be worth considering to combine this with the
925 * subsequent ProcArrayRemove()
926 */
927 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
928
929 pgxactoff = proc->pgxactoff;
930
933
936 proc->recoveryConflictPending = false;
937
939 Assert(!proc->delayChkptFlags);
940
941 /*
942 * Need to increment completion count even though transaction hasn't
943 * really committed yet. The reason for that is that GetSnapshotData()
944 * omits the xid of the current transaction, thus without the increment we
945 * otherwise could end up reusing the snapshot later. Which would be bad,
946 * because it might not count the prepared transaction as running.
947 */
949
950 /* Clear the subtransaction-XID cache too */
951 Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
953 if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
954 {
955 ProcGlobal->subxidStates[pgxactoff].count = 0;
956 ProcGlobal->subxidStates[pgxactoff].overflowed = false;
957 proc->subxidStatus.count = 0;
958 proc->subxidStatus.overflowed = false;
959 }
960
961 LWLockRelease(ProcArrayLock);
962}
963
964/*
965 * Update TransamVariables->latestCompletedXid to point to latestXid if
966 * currently older.
967 */
968static void
970{
972
973 Assert(FullTransactionIdIsValid(cur_latest));
975 Assert(LWLockHeldByMe(ProcArrayLock));
976
977 if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
978 {
980 FullXidRelativeTo(cur_latest, latestXid);
981 }
982
985}
986
987/*
988 * Same as MaintainLatestCompletedXid, except for use during WAL replay.
989 */
990static void
992{
995
997 Assert(LWLockHeldByMe(ProcArrayLock));
998
999 /*
1000 * Need a FullTransactionId to compare latestXid with. Can't rely on
1001 * latestCompletedXid to be initialized in recovery. But in recovery it's
1002 * safe to access nextXid without a lock for the startup process.
1003 */
1006
1007 if (!FullTransactionIdIsValid(cur_latest) ||
1008 TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
1009 {
1011 FullXidRelativeTo(rel, latestXid);
1012 }
1013
1015}
1016
1017/*
1018 * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
1019 *
1020 * Remember up to where the startup process initialized the CLOG and subtrans
1021 * so we can ensure it's initialized gaplessly up to the point where necessary
1022 * while in recovery.
1023 */
1024void
1026{
1028 Assert(TransactionIdIsNormal(initializedUptoXID));
1029
1030 /*
1031 * we set latestObservedXid to the xid SUBTRANS has been initialized up
1032 * to, so we can extend it from that point onwards in
1033 * RecordKnownAssignedTransactionIds, and when we get consistent in
1034 * ProcArrayApplyRecoveryInfo().
1035 */
1036 latestObservedXid = initializedUptoXID;
1038}
1039
1040/*
1041 * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
1042 *
1043 * Takes us through 3 states: Initialized, Pending and Ready.
1044 * Normal case is to go all the way to Ready straight away, though there
1045 * are atypical cases where we need to take it in steps.
1046 *
1047 * Use the data about running transactions on the primary to create the initial
1048 * state of KnownAssignedXids. We also use these records to regularly prune
1049 * KnownAssignedXids because we know it is possible that some transactions
1050 * with FATAL errors fail to write abort records, which could cause eventual
1051 * overflow.
1052 *
1053 * See comments for LogStandbySnapshot().
1054 */
1055void
1057{
1058 TransactionId *xids;
1059 TransactionId advanceNextXid;
1060 int nxids;
1061 int i;
1062
1067
1068 /*
1069 * Remove stale transactions, if any.
1070 */
1072
1073 /*
1074 * Adjust TransamVariables->nextXid before StandbyReleaseOldLocks(),
1075 * because we will need it up to date for accessing two-phase transactions
1076 * in StandbyReleaseOldLocks().
1077 */
1078 advanceNextXid = running->nextXid;
1079 TransactionIdRetreat(advanceNextXid);
1082
1083 /*
1084 * Remove stale locks, if any.
1085 */
1087
1088 /*
1089 * If our snapshot is already valid, nothing else to do...
1090 */
1092 return;
1093
1094 /*
1095 * If our initial RunningTransactionsData had an overflowed snapshot then
1096 * we knew we were missing some subxids from our snapshot. If we continue
1097 * to see overflowed snapshots then we might never be able to start up, so
1098 * we make another test to see if our snapshot is now valid. We know that
1099 * the missing subxids are equal to or earlier than nextXid. After we
1100 * initialise we continue to apply changes during recovery, so once the
1101 * oldestRunningXid is later than the nextXid from the initial snapshot we
1102 * know that we no longer have missing information and can mark the
1103 * snapshot as valid.
1104 */
1106 {
1107 /*
1108 * If the snapshot isn't overflowed or if its empty we can reset our
1109 * pending state and use this snapshot instead.
1110 */
1111 if (running->subxid_status != SUBXIDS_MISSING || running->xcnt == 0)
1112 {
1113 /*
1114 * If we have already collected known assigned xids, we need to
1115 * throw them away before we apply the recovery snapshot.
1116 */
1119 }
1120 else
1121 {
1123 running->oldestRunningXid))
1124 {
1126 elog(DEBUG1,
1127 "recovery snapshots are now enabled");
1128 }
1129 else
1130 elog(DEBUG1,
1131 "recovery snapshot waiting for non-overflowed snapshot or "
1132 "until oldest active xid on standby is at least %u (now %u)",
1134 running->oldestRunningXid);
1135 return;
1136 }
1137 }
1138
1140
1141 /*
1142 * NB: this can be reached at least twice, so make sure new code can deal
1143 * with that.
1144 */
1145
1146 /*
1147 * Nobody else is running yet, but take locks anyhow
1148 */
1149 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1150
1151 /*
1152 * KnownAssignedXids is sorted so we cannot just add the xids, we have to
1153 * sort them first.
1154 *
1155 * Some of the new xids are top-level xids and some are subtransactions.
1156 * We don't call SubTransSetParent because it doesn't matter yet. If we
1157 * aren't overflowed then all xids will fit in snapshot and so we don't
1158 * need subtrans. If we later overflow, an xid assignment record will add
1159 * xids to subtrans. If RunningTransactionsData is overflowed then we
1160 * don't have enough information to correctly update subtrans anyway.
1161 */
1162
1163 /*
1164 * Allocate a temporary array to avoid modifying the array passed as
1165 * argument.
1166 */
1167 xids = palloc_array(TransactionId, running->xcnt + running->subxcnt);
1168
1169 /*
1170 * Add to the temp array any xids which have not already completed.
1171 */
1172 nxids = 0;
1173 for (i = 0; i < running->xcnt + running->subxcnt; i++)
1174 {
1175 TransactionId xid = running->xids[i];
1176
1177 /*
1178 * The running-xacts snapshot can contain xids that were still visible
1179 * in the procarray when the snapshot was taken, but were already
1180 * WAL-logged as completed. They're not running anymore, so ignore
1181 * them.
1182 */
1184 continue;
1185
1186 xids[nxids++] = xid;
1187 }
1188
1189 if (nxids > 0)
1190 {
1192 {
1193 LWLockRelease(ProcArrayLock);
1194 elog(ERROR, "KnownAssignedXids is not empty");
1195 }
1196
1197 /*
1198 * Sort the array so that we can add them safely into
1199 * KnownAssignedXids.
1200 *
1201 * We have to sort them logically, because in KnownAssignedXidsAdd we
1202 * call TransactionIdFollowsOrEquals and so on. But we know these XIDs
1203 * come from RUNNING_XACTS, which means there are only normal XIDs
1204 * from the same epoch, so this is safe.
1205 */
1206 qsort(xids, nxids, sizeof(TransactionId), xidLogicalComparator);
1207
1208 /*
1209 * Add the sorted snapshot into KnownAssignedXids. The running-xacts
1210 * snapshot may include duplicated xids because of prepared
1211 * transactions, so ignore them.
1212 */
1213 for (i = 0; i < nxids; i++)
1214 {
1215 if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i]))
1216 {
1217 elog(DEBUG1,
1218 "found duplicated transaction %u for KnownAssignedXids insertion",
1219 xids[i]);
1220 continue;
1221 }
1222 KnownAssignedXidsAdd(xids[i], xids[i], true);
1223 }
1224
1226 }
1227
1228 pfree(xids);
1229
1230 /*
1231 * latestObservedXid is at least set to the point where SUBTRANS was
1232 * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid
1233 * RecordKnownAssignedTransactionIds() was called for. Initialize
1234 * subtrans from thereon, up to nextXid - 1.
1235 *
1236 * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
1237 * because we've just added xids to the known assigned xids machinery that
1238 * haven't gone through RecordKnownAssignedTransactionId().
1239 */
1243 {
1246 }
1247 TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */
1248
1249 /* ----------
1250 * Now we've got the running xids we need to set the global values that
1251 * are used to track snapshots as they evolve further.
1252 *
1253 * - latestCompletedXid which will be the xmax for snapshots
1254 * - lastOverflowedXid which shows whether snapshots overflow
1255 * - nextXid
1256 *
1257 * If the snapshot overflowed, then we still initialise with what we know,
1258 * but the recovery snapshot isn't fully valid yet because we know there
1259 * are some subxids missing. We don't know the specific subxids that are
1260 * missing, so conservatively assume the last one is latestObservedXid.
1261 * ----------
1262 */
1263 if (running->subxid_status == SUBXIDS_MISSING)
1264 {
1266
1269 }
1270 else
1271 {
1273
1275
1276 /*
1277 * If the 'xids' array didn't include all subtransactions, we have to
1278 * mark any snapshots taken as overflowed.
1279 */
1280 if (running->subxid_status == SUBXIDS_IN_SUBTRANS)
1282 else
1283 {
1286 }
1287 }
1288
1289 /*
1290 * If a transaction wrote a commit record in the gap between taking and
1291 * logging the snapshot then latestCompletedXid may already be higher than
1292 * the value from the snapshot, so check before we use the incoming value.
1293 * It also might not yet be set at all.
1294 */
1296
1297 /*
1298 * NB: No need to increment TransamVariables->xactCompletionCount here,
1299 * nobody can see it yet.
1300 */
1301
1302 LWLockRelease(ProcArrayLock);
1303
1306 elog(DEBUG1, "recovery snapshots are now enabled");
1307 else
1308 elog(DEBUG1,
1309 "recovery snapshot waiting for non-overflowed snapshot or "
1310 "until oldest active xid on standby is at least %u (now %u)",
1312 running->oldestRunningXid);
1313}
1314
1315/*
1316 * ProcArrayApplyXidAssignment
1317 * Process an XLOG_XACT_ASSIGNMENT WAL record
1318 */
1319void
1321 int nsubxids, TransactionId *subxids)
1322{
1323 TransactionId max_xid;
1324 int i;
1325
1327
1328 max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
1329
1330 /*
1331 * Mark all the subtransactions as observed.
1332 *
1333 * NOTE: This will fail if the subxid contains too many previously
1334 * unobserved xids to fit into known-assigned-xids. That shouldn't happen
1335 * as the code stands, because xid-assignment records should never contain
1336 * more than PGPROC_MAX_CACHED_SUBXIDS entries.
1337 */
1339
1340 /*
1341 * Notice that we update pg_subtrans with the top-level xid, rather than
1342 * the parent xid. This is a difference between normal processing and
1343 * recovery, yet is still correct in all cases. The reason is that
1344 * subtransaction commit is not marked in clog until commit processing, so
1345 * all aborted subtransactions have already been clearly marked in clog.
1346 * As a result we are able to refer directly to the top-level
1347 * transaction's state rather than skipping through all the intermediate
1348 * states in the subtransaction tree. This should be the first time we
1349 * have attempted to SubTransSetParent().
1350 */
1351 for (i = 0; i < nsubxids; i++)
1352 SubTransSetParent(subxids[i], topxid);
1353
1354 /* KnownAssignedXids isn't maintained yet, so we're done for now */
1356 return;
1357
1358 /*
1359 * Uses same locking as transaction commit
1360 */
1361 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1362
1363 /*
1364 * Remove subxids from known-assigned-xacts.
1365 */
1367
1368 /*
1369 * Advance lastOverflowedXid to be at least the last of these subxids.
1370 */
1372 procArray->lastOverflowedXid = max_xid;
1373
1374 LWLockRelease(ProcArrayLock);
1375}
1376
1377/*
1378 * TransactionIdIsInProgress -- is given transaction running in some backend
1379 *
1380 * Aside from some shortcuts such as checking RecentXmin and our own Xid,
1381 * there are four possibilities for finding a running transaction:
1382 *
1383 * 1. The given Xid is a main transaction Id. We will find this out cheaply
1384 * by looking at ProcGlobal->xids.
1385 *
1386 * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
1387 * We can find this out cheaply too.
1388 *
1389 * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
1390 * if the Xid is running on the primary.
1391 *
1392 * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
1393 * if that is running according to ProcGlobal->xids[] or KnownAssignedXids.
1394 * This is the slowest way, but sadly it has to be done always if the others
1395 * failed, unless we see that the cached subxact sets are complete (none have
1396 * overflowed).
1397 *
1398 * ProcArrayLock has to be held while we do 1, 2, 3. If we save the top Xids
1399 * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
1400 * This buys back some concurrency (and we can't retrieve the main Xids from
1401 * ProcGlobal->xids[] again anyway; see GetNewTransactionId).
1402 */
1403bool
1405{
1406 static TransactionId *xids = NULL;
1407 static TransactionId *other_xids;
1408 XidCacheStatus *other_subxidstates;
1409 int nxids = 0;
1410 ProcArrayStruct *arrayP = procArray;
1411 TransactionId topxid;
1412 TransactionId latestCompletedXid;
1413 int mypgxactoff;
1414 int numProcs;
1415 int j;
1416
1417 /*
1418 * Don't bother checking a transaction older than RecentXmin; it could not
1419 * possibly still be running. (Note: in particular, this guarantees that
1420 * we reject InvalidTransactionId, FrozenTransactionId, etc as not
1421 * running.)
1422 */
1424 {
1426 return false;
1427 }
1428
1429 /*
1430 * We may have just checked the status of this transaction, so if it is
1431 * already known to be completed, we can fall out without any access to
1432 * shared memory.
1433 */
1435 {
1437 return false;
1438 }
1439
1440 /*
1441 * Also, we can handle our own transaction (and subtransactions) without
1442 * any access to shared memory.
1443 */
1445 {
1447 return true;
1448 }
1449
1450 /*
1451 * If first time through, get workspace to remember main XIDs in. We
1452 * malloc it permanently to avoid repeated palloc/pfree overhead.
1453 */
1454 if (xids == NULL)
1455 {
1456 /*
1457 * In hot standby mode, reserve enough space to hold all xids in the
1458 * known-assigned list. If we later finish recovery, we no longer need
1459 * the bigger array, but we don't bother to shrink it.
1460 */
1461 int maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs;
1462
1463 xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId));
1464 if (xids == NULL)
1465 ereport(ERROR,
1466 (errcode(ERRCODE_OUT_OF_MEMORY),
1467 errmsg("out of memory")));
1468 }
1469
1470 other_xids = ProcGlobal->xids;
1471 other_subxidstates = ProcGlobal->subxidStates;
1472
1473 LWLockAcquire(ProcArrayLock, LW_SHARED);
1474
1475 /*
1476 * Now that we have the lock, we can check latestCompletedXid; if the
1477 * target Xid is after that, it's surely still running.
1478 */
1479 latestCompletedXid =
1481 if (TransactionIdPrecedes(latestCompletedXid, xid))
1482 {
1483 LWLockRelease(ProcArrayLock);
1485 return true;
1486 }
1487
1488 /* No shortcuts, gotta grovel through the array */
1489 mypgxactoff = MyProc->pgxactoff;
1490 numProcs = arrayP->numProcs;
1491 for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
1492 {
1493 int pgprocno;
1494 PGPROC *proc;
1495 TransactionId pxid;
1496 int pxids;
1497
1498 /* Ignore ourselves --- dealt with it above */
1499 if (pgxactoff == mypgxactoff)
1500 continue;
1501
1502 /* Fetch xid just once - see GetNewTransactionId */
1503 pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
1504
1505 if (!TransactionIdIsValid(pxid))
1506 continue;
1507
1508 /*
1509 * Step 1: check the main Xid
1510 */
1511 if (TransactionIdEquals(pxid, xid))
1512 {
1513 LWLockRelease(ProcArrayLock);
1515 return true;
1516 }
1517
1518 /*
1519 * We can ignore main Xids that are younger than the target Xid, since
1520 * the target could not possibly be their child.
1521 */
1522 if (TransactionIdPrecedes(xid, pxid))
1523 continue;
1524
1525 /*
1526 * Step 2: check the cached child-Xids arrays
1527 */
1528 pxids = other_subxidstates[pgxactoff].count;
1529 pg_read_barrier(); /* pairs with barrier in GetNewTransactionId() */
1530 pgprocno = arrayP->pgprocnos[pgxactoff];
1531 proc = &allProcs[pgprocno];
1532 for (j = pxids - 1; j >= 0; j--)
1533 {
1534 /* Fetch xid just once - see GetNewTransactionId */
1536
1537 if (TransactionIdEquals(cxid, xid))
1538 {
1539 LWLockRelease(ProcArrayLock);
1541 return true;
1542 }
1543 }
1544
1545 /*
1546 * Save the main Xid for step 4. We only need to remember main Xids
1547 * that have uncached children. (Note: there is no race condition
1548 * here because the overflowed flag cannot be cleared, only set, while
1549 * we hold ProcArrayLock. So we can't miss an Xid that we need to
1550 * worry about.)
1551 */
1552 if (other_subxidstates[pgxactoff].overflowed)
1553 xids[nxids++] = pxid;
1554 }
1555
1556 /*
1557 * Step 3: in hot standby mode, check the known-assigned-xids list. XIDs
1558 * in the list must be treated as running.
1559 */
1560 if (RecoveryInProgress())
1561 {
1562 /* none of the PGPROC entries should have XIDs in hot standby mode */
1563 Assert(nxids == 0);
1564
1565 if (KnownAssignedXidExists(xid))
1566 {
1567 LWLockRelease(ProcArrayLock);
1569 return true;
1570 }
1571
1572 /*
1573 * If the KnownAssignedXids overflowed, we have to check pg_subtrans
1574 * too. Fetch all xids from KnownAssignedXids that are lower than
1575 * xid, since if xid is a subtransaction its parent will always have a
1576 * lower value. Note we will collect both main and subXIDs here, but
1577 * there's no help for it.
1578 */
1580 nxids = KnownAssignedXidsGet(xids, xid);
1581 }
1582
1583 LWLockRelease(ProcArrayLock);
1584
1585 /*
1586 * If none of the relevant caches overflowed, we know the Xid is not
1587 * running without even looking at pg_subtrans.
1588 */
1589 if (nxids == 0)
1590 {
1593 return false;
1594 }
1595
1596 /*
1597 * Step 4: have to check pg_subtrans.
1598 *
1599 * At this point, we know it's either a subtransaction of one of the Xids
1600 * in xids[], or it's not running. If it's an already-failed
1601 * subtransaction, we want to say "not running" even though its parent may
1602 * still be running. So first, check pg_xact to see if it's been aborted.
1603 */
1605
1606 if (TransactionIdDidAbort(xid))
1607 {
1609 return false;
1610 }
1611
1612 /*
1613 * It isn't aborted, so check whether the transaction tree it belongs to
1614 * is still running (or, more precisely, whether it was running when we
1615 * held ProcArrayLock).
1616 */
1617 topxid = SubTransGetTopmostTransaction(xid);
1619 if (!TransactionIdEquals(topxid, xid) &&
1620 pg_lfind32(topxid, xids, nxids))
1621 return true;
1622
1624 return false;
1625}
1626
1627
1628/*
1629 * Determine XID horizons.
1630 *
1631 * This is used by wrapper functions like GetOldestNonRemovableTransactionId()
1632 * (for VACUUM), GetReplicationHorizons() (for hot_standby_feedback), etc as
1633 * well as "internally" by GlobalVisUpdate() (see comment above struct
1634 * GlobalVisState).
1635 *
1636 * See the definition of ComputeXidHorizonsResult for the various computed
1637 * horizons.
1638 *
1639 * For VACUUM separate horizons (used to decide which deleted tuples must
1640 * be preserved), for shared and non-shared tables are computed. For shared
1641 * relations backends in all databases must be considered, but for non-shared
1642 * relations that's not required, since only backends in my own database could
1643 * ever see the tuples in them. Also, we can ignore concurrently running lazy
1644 * VACUUMs because (a) they must be working on other tables, and (b) they
1645 * don't need to do snapshot-based lookups.
1646 *
1647 * This also computes a horizon used to truncate pg_subtrans. For that
1648 * backends in all databases have to be considered, and concurrently running
1649 * lazy VACUUMs cannot be ignored, as they still may perform pg_subtrans
1650 * accesses.
1651 *
1652 * Note: we include all currently running xids in the set of considered xids.
1653 * This ensures that if a just-started xact has not yet set its snapshot,
1654 * when it does set the snapshot it cannot set xmin less than what we compute.
1655 * See notes in src/backend/access/transam/README.
1656 *
1657 * Note: despite the above, it's possible for the calculated values to move
1658 * backwards on repeated calls. The calculated values are conservative, so
1659 * that anything older is definitely not considered as running by anyone
1660 * anymore, but the exact values calculated depend on a number of things. For
1661 * example, if there are no transactions running in the current database, the
1662 * horizon for normal tables will be latestCompletedXid. If a transaction
1663 * begins after that, its xmin will include in-progress transactions in other
1664 * databases that started earlier, so another call will return a lower value.
1665 * Nonetheless it is safe to vacuum a table in the current database with the
1666 * first result. There are also replication-related effects: a walsender
1667 * process can set its xmin based on transactions that are no longer running
1668 * on the primary but are still being replayed on the standby, thus possibly
1669 * making the values go backwards. In this case there is a possibility that
1670 * we lose data that the standby would like to have, but unless the standby
1671 * uses a replication slot to make its xmin persistent there is little we can
1672 * do about that --- data is only protected if the walsender runs continuously
1673 * while queries are executed on the standby. (The Hot Standby code deals
1674 * with such cases by failing standby queries that needed to access
1675 * already-removed data, so there's no integrity bug.)
1676 *
1677 * Note: the approximate horizons (see definition of GlobalVisState) are
1678 * updated by the computations done here. That's currently required for
1679 * correctness and a small optimization. Without doing so it's possible that
1680 * heap vacuum's call to heap_page_prune_and_freeze() uses a more conservative
1681 * horizon than later when deciding which tuples can be removed - which the
1682 * code doesn't expect (breaking HOT).
1683 */
1684static void
1686{
1687 ProcArrayStruct *arrayP = procArray;
1688 TransactionId kaxmin;
1689 bool in_recovery = RecoveryInProgress();
1690 TransactionId *other_xids = ProcGlobal->xids;
1691
1692 /* inferred after ProcArrayLock is released */
1694
1695 LWLockAcquire(ProcArrayLock, LW_SHARED);
1696
1698
1699 /*
1700 * We initialize the MIN() calculation with latestCompletedXid + 1. This
1701 * is a lower bound for the XIDs that might appear in the ProcArray later,
1702 * and so protects us against overestimating the result due to future
1703 * additions.
1704 */
1705 {
1706 TransactionId initial;
1707
1709 Assert(TransactionIdIsValid(initial));
1710 TransactionIdAdvance(initial);
1711
1712 h->oldest_considered_running = initial;
1713 h->shared_oldest_nonremovable = initial;
1714 h->data_oldest_nonremovable = initial;
1715
1716 /*
1717 * Only modifications made by this backend affect the horizon for
1718 * temporary relations. Instead of a check in each iteration of the
1719 * loop over all PGPROCs it is cheaper to just initialize to the
1720 * current top-level xid any.
1721 *
1722 * Without an assigned xid we could use a horizon as aggressive as
1723 * GetNewTransactionId(), but we can get away with the much cheaper
1724 * latestCompletedXid + 1: If this backend has no xid there, by
1725 * definition, can't be any newer changes in the temp table than
1726 * latestCompletedXid.
1727 */
1730 else
1731 h->temp_oldest_nonremovable = initial;
1732 }
1733
1734 /*
1735 * Fetch slot horizons while ProcArrayLock is held - the
1736 * LWLockAcquire/LWLockRelease are a barrier, ensuring this happens inside
1737 * the lock.
1738 */
1741
1742 for (int index = 0; index < arrayP->numProcs; index++)
1743 {
1744 int pgprocno = arrayP->pgprocnos[index];
1745 PGPROC *proc = &allProcs[pgprocno];
1746 int8 statusFlags = ProcGlobal->statusFlags[index];
1747 TransactionId xid;
1748 TransactionId xmin;
1749
1750 /* Fetch xid just once - see GetNewTransactionId */
1751 xid = UINT32_ACCESS_ONCE(other_xids[index]);
1752 xmin = UINT32_ACCESS_ONCE(proc->xmin);
1753
1754 /*
1755 * Consider both the transaction's Xmin, and its Xid.
1756 *
1757 * We must check both because a transaction might have an Xmin but not
1758 * (yet) an Xid; conversely, if it has an Xid, that could determine
1759 * some not-yet-set Xmin.
1760 */
1761 xmin = TransactionIdOlder(xmin, xid);
1762
1763 /* if neither is set, this proc doesn't influence the horizon */
1764 if (!TransactionIdIsValid(xmin))
1765 continue;
1766
1767 /*
1768 * Don't ignore any procs when determining which transactions might be
1769 * considered running. While slots should ensure logical decoding
1770 * backends are protected even without this check, it can't hurt to
1771 * include them here as well..
1772 */
1775
1776 /*
1777 * Skip over backends either vacuuming (which is ok with rows being
1778 * removed, as long as pg_subtrans is not truncated) or doing logical
1779 * decoding (which manages xmin separately, check below).
1780 */
1781 if (statusFlags & (PROC_IN_VACUUM | PROC_IN_LOGICAL_DECODING))
1782 continue;
1783
1784 /* shared tables need to take backends in all databases into account */
1787
1788 /*
1789 * Normally sessions in other databases are ignored for anything but
1790 * the shared horizon.
1791 *
1792 * However, include them when MyDatabaseId is not (yet) set. A
1793 * backend in the process of starting up must not compute a "too
1794 * aggressive" horizon, otherwise we could end up using it to prune
1795 * still-needed data away. If the current backend never connects to a
1796 * database this is harmless, because data_oldest_nonremovable will
1797 * never be utilized.
1798 *
1799 * Also, sessions marked with PROC_AFFECTS_ALL_HORIZONS should always
1800 * be included. (This flag is used for hot standby feedback, which
1801 * can't be tied to a specific database.)
1802 *
1803 * Also, while in recovery we cannot compute an accurate per-database
1804 * horizon, as all xids are managed via the KnownAssignedXids
1805 * machinery.
1806 */
1807 if (proc->databaseId == MyDatabaseId ||
1809 (statusFlags & PROC_AFFECTS_ALL_HORIZONS) ||
1810 in_recovery)
1811 {
1814 }
1815 }
1816
1817 /*
1818 * If in recovery fetch oldest xid in KnownAssignedXids, will be applied
1819 * after lock is released.
1820 */
1821 if (in_recovery)
1823
1824 /*
1825 * No other information from shared state is needed, release the lock
1826 * immediately. The rest of the computations can be done without a lock.
1827 */
1828 LWLockRelease(ProcArrayLock);
1829
1830 if (in_recovery)
1831 {
1838 /* temp relations cannot be accessed in recovery */
1839 }
1840
1845
1846 /*
1847 * Check whether there are replication slots requiring an older xmin.
1848 */
1853
1854 /*
1855 * The only difference between catalog / data horizons is that the slot's
1856 * catalog xmin is applied to the catalog one (so catalogs can be accessed
1857 * for logical decoding). Initialize with data horizon, and then back up
1858 * further if necessary. Have to back up the shared horizon as well, since
1859 * that also can contain catalogs.
1860 */
1869
1870 /*
1871 * It's possible that slots backed up the horizons further than
1872 * oldest_considered_running. Fix.
1873 */
1883
1884 /*
1885 * shared horizons have to be at least as old as the oldest visible in
1886 * current db
1887 */
1892
1893 /*
1894 * Horizons need to ensure that pg_subtrans access is still possible for
1895 * the relevant backends.
1896 */
1907 h->slot_xmin));
1910 h->slot_catalog_xmin));
1911
1912 /* update approximate horizons with the computed horizons */
1914}
1915
1916/*
1917 * Determine what kind of visibility horizon needs to be used for a
1918 * relation. If rel is NULL, the most conservative horizon is used.
1919 */
1920static inline GlobalVisHorizonKind
1922{
1923 /*
1924 * Other relkinds currently don't contain xids, nor always the necessary
1925 * logical decoding markers.
1926 */
1927 Assert(!rel ||
1928 rel->rd_rel->relkind == RELKIND_RELATION ||
1929 rel->rd_rel->relkind == RELKIND_MATVIEW ||
1930 rel->rd_rel->relkind == RELKIND_TOASTVALUE);
1931
1932 if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress())
1933 return VISHORIZON_SHARED;
1934 else if (IsCatalogRelation(rel) ||
1936 return VISHORIZON_CATALOG;
1937 else if (!RELATION_IS_LOCAL(rel))
1938 return VISHORIZON_DATA;
1939 else
1940 return VISHORIZON_TEMP;
1941}
1942
1943/*
1944 * Return the oldest XID for which deleted tuples must be preserved in the
1945 * passed table.
1946 *
1947 * If rel is not NULL the horizon may be considerably more recent than
1948 * otherwise (i.e. fewer tuples will be removable). In the NULL case a horizon
1949 * that is correct (but not optimal) for all relations will be returned.
1950 *
1951 * This is used by VACUUM to decide which deleted tuples must be preserved in
1952 * the passed in table.
1953 */
1956{
1957 ComputeXidHorizonsResult horizons;
1958
1959 ComputeXidHorizons(&horizons);
1960
1961 switch (GlobalVisHorizonKindForRel(rel))
1962 {
1963 case VISHORIZON_SHARED:
1964 return horizons.shared_oldest_nonremovable;
1965 case VISHORIZON_CATALOG:
1966 return horizons.catalog_oldest_nonremovable;
1967 case VISHORIZON_DATA:
1968 return horizons.data_oldest_nonremovable;
1969 case VISHORIZON_TEMP:
1970 return horizons.temp_oldest_nonremovable;
1971 }
1972
1973 /* just to prevent compiler warnings */
1974 return InvalidTransactionId;
1975}
1976
1977/*
1978 * Return the oldest transaction id any currently running backend might still
1979 * consider running. This should not be used for visibility / pruning
1980 * determinations (see GetOldestNonRemovableTransactionId()), but for
1981 * decisions like up to where pg_subtrans can be truncated.
1982 */
1985{
1986 ComputeXidHorizonsResult horizons;
1987
1988 ComputeXidHorizons(&horizons);
1989
1990 return horizons.oldest_considered_running;
1991}
1992
1993/*
1994 * Return the visibility horizons for a hot standby feedback message.
1995 */
1996void
1998{
1999 ComputeXidHorizonsResult horizons;
2000
2001 ComputeXidHorizons(&horizons);
2002
2003 /*
2004 * Don't want to use shared_oldest_nonremovable here, as that contains the
2005 * effect of replication slot's catalog_xmin. We want to send a separate
2006 * feedback for the catalog horizon, so the primary can remove data table
2007 * contents more aggressively.
2008 */
2009 *xmin = horizons.shared_oldest_nonremovable_raw;
2010 *catalog_xmin = horizons.slot_catalog_xmin;
2011}
2012
2013/*
2014 * GetMaxSnapshotXidCount -- get max size for snapshot XID array
2015 *
2016 * We have to export this for use by snapmgr.c.
2017 */
2018int
2020{
2021 return procArray->maxProcs;
2022}
2023
2024/*
2025 * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
2026 *
2027 * We have to export this for use by snapmgr.c.
2028 */
2029int
2031{
2033}
2034
2035/*
2036 * Helper function for GetSnapshotData() that checks if the bulk of the
2037 * visibility information in the snapshot is still valid. If so, it updates
2038 * the fields that need to change and returns true. Otherwise it returns
2039 * false.
2040 *
2041 * This very likely can be evolved to not need ProcArrayLock held (at very
2042 * least in the case we already hold a snapshot), but that's for another day.
2043 */
2044static bool
2046{
2047 uint64 curXactCompletionCount;
2048
2049 Assert(LWLockHeldByMe(ProcArrayLock));
2050
2051 if (unlikely(snapshot->snapXactCompletionCount == 0))
2052 return false;
2053
2054 curXactCompletionCount = TransamVariables->xactCompletionCount;
2055 if (curXactCompletionCount != snapshot->snapXactCompletionCount)
2056 return false;
2057
2058 /*
2059 * If the current xactCompletionCount is still the same as it was at the
2060 * time the snapshot was built, we can be sure that rebuilding the
2061 * contents of the snapshot the hard way would result in the same snapshot
2062 * contents:
2063 *
2064 * As explained in transam/README, the set of xids considered running by
2065 * GetSnapshotData() cannot change while ProcArrayLock is held. Snapshot
2066 * contents only depend on transactions with xids and xactCompletionCount
2067 * is incremented whenever a transaction with an xid finishes (while
2068 * holding ProcArrayLock exclusively). Thus the xactCompletionCount check
2069 * ensures we would detect if the snapshot would have changed.
2070 *
2071 * As the snapshot contents are the same as it was before, it is safe to
2072 * re-enter the snapshot's xmin into the PGPROC array. None of the rows
2073 * visible under the snapshot could already have been removed (that'd
2074 * require the set of running transactions to change) and it fulfills the
2075 * requirement that concurrent GetSnapshotData() calls yield the same
2076 * xmin.
2077 */
2079 MyProc->xmin = TransactionXmin = snapshot->xmin;
2080
2081 RecentXmin = snapshot->xmin;
2083
2084 snapshot->curcid = GetCurrentCommandId(false);
2085 snapshot->active_count = 0;
2086 snapshot->regd_count = 0;
2087 snapshot->copied = false;
2088
2089 return true;
2090}
2091
2092/*
2093 * GetSnapshotData -- returns information about running transactions.
2094 *
2095 * The returned snapshot includes xmin (lowest still-running xact ID),
2096 * xmax (highest completed xact ID + 1), and a list of running xact IDs
2097 * in the range xmin <= xid < xmax. It is used as follows:
2098 * All xact IDs < xmin are considered finished.
2099 * All xact IDs >= xmax are considered still running.
2100 * For an xact ID xmin <= xid < xmax, consult list to see whether
2101 * it is considered running or not.
2102 * This ensures that the set of transactions seen as "running" by the
2103 * current xact will not change after it takes the snapshot.
2104 *
2105 * All running top-level XIDs are included in the snapshot, except for lazy
2106 * VACUUM processes. We also try to include running subtransaction XIDs,
2107 * but since PGPROC has only a limited cache area for subxact XIDs, full
2108 * information may not be available. If we find any overflowed subxid arrays,
2109 * we have to mark the snapshot's subxid data as overflowed, and extra work
2110 * *may* need to be done to determine what's running (see XidInMVCCSnapshot()).
2111 *
2112 * We also update the following backend-global variables:
2113 * TransactionXmin: the oldest xmin of any snapshot in use in the
2114 * current transaction (this is the same as MyProc->xmin).
2115 * RecentXmin: the xmin computed for the most recent snapshot. XIDs
2116 * older than this are known not running any more.
2117 *
2118 * And try to advance the bounds of GlobalVis{Shared,Catalog,Data,Temp}Rels
2119 * for the benefit of the GlobalVisTest* family of functions.
2120 *
2121 * Note: this function should probably not be called with an argument that's
2122 * not statically allocated (see xip allocation below).
2123 */
2126{
2127 ProcArrayStruct *arrayP = procArray;
2128 TransactionId *other_xids = ProcGlobal->xids;
2129 TransactionId xmin;
2130 TransactionId xmax;
2131 int count = 0;
2132 int subcount = 0;
2133 bool suboverflowed = false;
2134 FullTransactionId latest_completed;
2135 TransactionId oldestxid;
2136 int mypgxactoff;
2137 TransactionId myxid;
2138 uint64 curXactCompletionCount;
2139
2140 TransactionId replication_slot_xmin = InvalidTransactionId;
2141 TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
2142
2143 Assert(snapshot != NULL);
2144
2145 /*
2146 * Allocating space for maxProcs xids is usually overkill; numProcs would
2147 * be sufficient. But it seems better to do the malloc while not holding
2148 * the lock, so we can't look at numProcs. Likewise, we allocate much
2149 * more subxip storage than is probably needed.
2150 *
2151 * This does open a possibility for avoiding repeated malloc/free: since
2152 * maxProcs does not change at runtime, we can simply reuse the previous
2153 * xip arrays if any. (This relies on the fact that all callers pass
2154 * static SnapshotData structs.)
2155 */
2156 if (snapshot->xip == NULL)
2157 {
2158 /*
2159 * First call for this snapshot. Snapshot is same size whether or not
2160 * we are in recovery, see later comments.
2161 */
2162 snapshot->xip = (TransactionId *)
2164 if (snapshot->xip == NULL)
2165 ereport(ERROR,
2166 (errcode(ERRCODE_OUT_OF_MEMORY),
2167 errmsg("out of memory")));
2168 Assert(snapshot->subxip == NULL);
2169 snapshot->subxip = (TransactionId *)
2171 if (snapshot->subxip == NULL)
2172 ereport(ERROR,
2173 (errcode(ERRCODE_OUT_OF_MEMORY),
2174 errmsg("out of memory")));
2175 }
2176
2177 /*
2178 * It is sufficient to get shared lock on ProcArrayLock, even if we are
2179 * going to set MyProc->xmin.
2180 */
2181 LWLockAcquire(ProcArrayLock, LW_SHARED);
2182
2183 if (GetSnapshotDataReuse(snapshot))
2184 {
2185 LWLockRelease(ProcArrayLock);
2186 return snapshot;
2187 }
2188
2189 latest_completed = TransamVariables->latestCompletedXid;
2190 mypgxactoff = MyProc->pgxactoff;
2191 myxid = other_xids[mypgxactoff];
2192 Assert(myxid == MyProc->xid);
2193
2194 oldestxid = TransamVariables->oldestXid;
2195 curXactCompletionCount = TransamVariables->xactCompletionCount;
2196
2197 /* xmax is always latestCompletedXid + 1 */
2198 xmax = XidFromFullTransactionId(latest_completed);
2201
2202 /* initialize xmin calculation with xmax */
2203 xmin = xmax;
2204
2205 /* take own xid into account, saves a check inside the loop */
2206 if (TransactionIdIsNormal(myxid) && NormalTransactionIdPrecedes(myxid, xmin))
2207 xmin = myxid;
2208
2210
2211 if (!snapshot->takenDuringRecovery)
2212 {
2213 int numProcs = arrayP->numProcs;
2214 TransactionId *xip = snapshot->xip;
2215 int *pgprocnos = arrayP->pgprocnos;
2216 XidCacheStatus *subxidStates = ProcGlobal->subxidStates;
2217 uint8 *allStatusFlags = ProcGlobal->statusFlags;
2218
2219 /*
2220 * First collect set of pgxactoff/xids that need to be included in the
2221 * snapshot.
2222 */
2223 for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
2224 {
2225 /* Fetch xid just once - see GetNewTransactionId */
2226 TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
2227 uint8 statusFlags;
2228
2229 Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff);
2230
2231 /*
2232 * If the transaction has no XID assigned, we can skip it; it
2233 * won't have sub-XIDs either.
2234 */
2235 if (likely(xid == InvalidTransactionId))
2236 continue;
2237
2238 /*
2239 * We don't include our own XIDs (if any) in the snapshot. It
2240 * needs to be included in the xmin computation, but we did so
2241 * outside the loop.
2242 */
2243 if (pgxactoff == mypgxactoff)
2244 continue;
2245
2246 /*
2247 * The only way we are able to get here with a non-normal xid is
2248 * during bootstrap - with this backend using
2249 * BootstrapTransactionId. But the above test should filter that
2250 * out.
2251 */
2253
2254 /*
2255 * If the XID is >= xmax, we can skip it; such transactions will
2256 * be treated as running anyway (and any sub-XIDs will also be >=
2257 * xmax).
2258 */
2259 if (!NormalTransactionIdPrecedes(xid, xmax))
2260 continue;
2261
2262 /*
2263 * Skip over backends doing logical decoding which manages xmin
2264 * separately (check below) and ones running LAZY VACUUM.
2265 */
2266 statusFlags = allStatusFlags[pgxactoff];
2267 if (statusFlags & (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM))
2268 continue;
2269
2270 if (NormalTransactionIdPrecedes(xid, xmin))
2271 xmin = xid;
2272
2273 /* Add XID to snapshot. */
2274 xip[count++] = xid;
2275
2276 /*
2277 * Save subtransaction XIDs if possible (if we've already
2278 * overflowed, there's no point). Note that the subxact XIDs must
2279 * be later than their parent, so no need to check them against
2280 * xmin. We could filter against xmax, but it seems better not to
2281 * do that much work while holding the ProcArrayLock.
2282 *
2283 * The other backend can add more subxids concurrently, but cannot
2284 * remove any. Hence it's important to fetch nxids just once.
2285 * Should be safe to use memcpy, though. (We needn't worry about
2286 * missing any xids added concurrently, because they must postdate
2287 * xmax.)
2288 *
2289 * Again, our own XIDs are not included in the snapshot.
2290 */
2291 if (!suboverflowed)
2292 {
2293
2294 if (subxidStates[pgxactoff].overflowed)
2295 suboverflowed = true;
2296 else
2297 {
2298 int nsubxids = subxidStates[pgxactoff].count;
2299
2300 if (nsubxids > 0)
2301 {
2302 int pgprocno = pgprocnos[pgxactoff];
2303 PGPROC *proc = &allProcs[pgprocno];
2304
2305 pg_read_barrier(); /* pairs with GetNewTransactionId */
2306
2307 memcpy(snapshot->subxip + subcount,
2308 proc->subxids.xids,
2309 nsubxids * sizeof(TransactionId));
2310 subcount += nsubxids;
2311 }
2312 }
2313 }
2314 }
2315 }
2316 else
2317 {
2318 /*
2319 * We're in hot standby, so get XIDs from KnownAssignedXids.
2320 *
2321 * We store all xids directly into subxip[]. Here's why:
2322 *
2323 * In recovery we don't know which xids are top-level and which are
2324 * subxacts, a design choice that greatly simplifies xid processing.
2325 *
2326 * It seems like we would want to try to put xids into xip[] only, but
2327 * that is fairly small. We would either need to make that bigger or
2328 * to increase the rate at which we WAL-log xid assignment; neither is
2329 * an appealing choice.
2330 *
2331 * We could try to store xids into xip[] first and then into subxip[]
2332 * if there are too many xids. That only works if the snapshot doesn't
2333 * overflow because we do not search subxip[] in that case. A simpler
2334 * way is to just store all xids in the subxip array because this is
2335 * by far the bigger array. We just leave the xip array empty.
2336 *
2337 * Either way we need to change the way XidInMVCCSnapshot() works
2338 * depending upon when the snapshot was taken, or change normal
2339 * snapshot processing so it matches.
2340 *
2341 * Note: It is possible for recovery to end before we finish taking
2342 * the snapshot, and for newly assigned transaction ids to be added to
2343 * the ProcArray. xmax cannot change while we hold ProcArrayLock, so
2344 * those newly added transaction ids would be filtered away, so we
2345 * need not be concerned about them.
2346 */
2347 subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin,
2348 xmax);
2349
2351 suboverflowed = true;
2352 }
2353
2354
2355 /*
2356 * Fetch into local variable while ProcArrayLock is held - the
2357 * LWLockRelease below is a barrier, ensuring this happens inside the
2358 * lock.
2359 */
2360 replication_slot_xmin = procArray->replication_slot_xmin;
2361 replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
2362
2364 MyProc->xmin = TransactionXmin = xmin;
2365
2366 LWLockRelease(ProcArrayLock);
2367
2368 /* maintain state for GlobalVis* */
2369 {
2370 TransactionId def_vis_xid;
2371 TransactionId def_vis_xid_data;
2372 FullTransactionId def_vis_fxid;
2373 FullTransactionId def_vis_fxid_data;
2374 FullTransactionId oldestfxid;
2375
2376 /*
2377 * Converting oldestXid is only safe when xid horizon cannot advance,
2378 * i.e. holding locks. While we don't hold the lock anymore, all the
2379 * necessary data has been gathered with lock held.
2380 */
2381 oldestfxid = FullXidRelativeTo(latest_completed, oldestxid);
2382
2383 /* Check whether there's a replication slot requiring an older xmin. */
2384 def_vis_xid_data =
2385 TransactionIdOlder(xmin, replication_slot_xmin);
2386
2387 /*
2388 * Rows in non-shared, non-catalog tables possibly could be vacuumed
2389 * if older than this xid.
2390 */
2391 def_vis_xid = def_vis_xid_data;
2392
2393 /*
2394 * Check whether there's a replication slot requiring an older catalog
2395 * xmin.
2396 */
2397 def_vis_xid =
2398 TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid);
2399
2400 def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid);
2401 def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data);
2402
2403 /*
2404 * Check if we can increase upper bound. As a previous
2405 * GlobalVisUpdate() might have computed more aggressive values, don't
2406 * overwrite them if so.
2407 */
2409 FullTransactionIdNewer(def_vis_fxid,
2412 FullTransactionIdNewer(def_vis_fxid,
2415 FullTransactionIdNewer(def_vis_fxid_data,
2417 /* See temp_oldest_nonremovable computation in ComputeXidHorizons() */
2418 if (TransactionIdIsNormal(myxid))
2420 FullXidRelativeTo(latest_completed, myxid);
2421 else
2422 {
2423 GlobalVisTempRels.definitely_needed = latest_completed;
2425 }
2426
2427 /*
2428 * Check if we know that we can initialize or increase the lower
2429 * bound. Currently the only cheap way to do so is to use
2430 * TransamVariables->oldestXid as input.
2431 *
2432 * We should definitely be able to do better. We could e.g. put a
2433 * global lower bound value into TransamVariables.
2434 */
2437 oldestfxid);
2440 oldestfxid);
2443 oldestfxid);
2444 /* accurate value known */
2446 }
2447
2448 RecentXmin = xmin;
2450
2451 snapshot->xmin = xmin;
2452 snapshot->xmax = xmax;
2453 snapshot->xcnt = count;
2454 snapshot->subxcnt = subcount;
2455 snapshot->suboverflowed = suboverflowed;
2456 snapshot->snapXactCompletionCount = curXactCompletionCount;
2457
2458 snapshot->curcid = GetCurrentCommandId(false);
2459
2460 /*
2461 * This is a new snapshot, so set both refcounts are zero, and mark it as
2462 * not copied in persistent memory.
2463 */
2464 snapshot->active_count = 0;
2465 snapshot->regd_count = 0;
2466 snapshot->copied = false;
2467
2468 return snapshot;
2469}
2470
2471/*
2472 * ProcArrayInstallImportedXmin -- install imported xmin into MyProc->xmin
2473 *
2474 * This is called when installing a snapshot imported from another
2475 * transaction. To ensure that OldestXmin doesn't go backwards, we must
2476 * check that the source transaction is still running, and we'd better do
2477 * that atomically with installing the new xmin.
2478 *
2479 * Returns true if successful, false if source xact is no longer running.
2480 */
2481bool
2483 VirtualTransactionId *sourcevxid)
2484{
2485 bool result = false;
2486 ProcArrayStruct *arrayP = procArray;
2487 int index;
2488
2490 if (!sourcevxid)
2491 return false;
2492
2493 /* Get lock so source xact can't end while we're doing this */
2494 LWLockAcquire(ProcArrayLock, LW_SHARED);
2495
2496 /*
2497 * Find the PGPROC entry of the source transaction. (This could use
2498 * GetPGProcByNumber(), unless it's a prepared xact. But this isn't
2499 * performance critical.)
2500 */
2501 for (index = 0; index < arrayP->numProcs; index++)
2502 {
2503 int pgprocno = arrayP->pgprocnos[index];
2504 PGPROC *proc = &allProcs[pgprocno];
2505 int statusFlags = ProcGlobal->statusFlags[index];
2506 TransactionId xid;
2507
2508 /* Ignore procs running LAZY VACUUM */
2509 if (statusFlags & PROC_IN_VACUUM)
2510 continue;
2511
2512 /* We are only interested in the specific virtual transaction. */
2513 if (proc->vxid.procNumber != sourcevxid->procNumber)
2514 continue;
2515 if (proc->vxid.lxid != sourcevxid->localTransactionId)
2516 continue;
2517
2518 /*
2519 * We check the transaction's database ID for paranoia's sake: if it's
2520 * in another DB then its xmin does not cover us. Caller should have
2521 * detected this already, so we just treat any funny cases as
2522 * "transaction not found".
2523 */
2524 if (proc->databaseId != MyDatabaseId)
2525 continue;
2526
2527 /*
2528 * Likewise, let's just make real sure its xmin does cover us.
2529 */
2530 xid = UINT32_ACCESS_ONCE(proc->xmin);
2531 if (!TransactionIdIsNormal(xid) ||
2533 continue;
2534
2535 /*
2536 * We're good. Install the new xmin. As in GetSnapshotData, set
2537 * TransactionXmin too. (Note that because snapmgr.c called
2538 * GetSnapshotData first, we'll be overwriting a valid xmin here, so
2539 * we don't check that.)
2540 */
2541 MyProc->xmin = TransactionXmin = xmin;
2542
2543 result = true;
2544 break;
2545 }
2546
2547 LWLockRelease(ProcArrayLock);
2548
2549 return result;
2550}
2551
2552/*
2553 * ProcArrayInstallRestoredXmin -- install restored xmin into MyProc->xmin
2554 *
2555 * This is like ProcArrayInstallImportedXmin, but we have a pointer to the
2556 * PGPROC of the transaction from which we imported the snapshot, rather than
2557 * an XID.
2558 *
2559 * Note that this function also copies statusFlags from the source `proc` in
2560 * order to avoid the case where MyProc's xmin needs to be skipped for
2561 * computing xid horizon.
2562 *
2563 * Returns true if successful, false if source xact is no longer running.
2564 */
2565bool
2567{
2568 bool result = false;
2569 TransactionId xid;
2570
2572 Assert(proc != NULL);
2573
2574 /*
2575 * Get an exclusive lock so that we can copy statusFlags from source proc.
2576 */
2577 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2578
2579 /*
2580 * Be certain that the referenced PGPROC has an advertised xmin which is
2581 * no later than the one we're installing, so that the system-wide xmin
2582 * can't go backwards. Also, make sure it's running in the same database,
2583 * so that the per-database xmin cannot go backwards.
2584 */
2585 xid = UINT32_ACCESS_ONCE(proc->xmin);
2586 if (proc->databaseId == MyDatabaseId &&
2587 TransactionIdIsNormal(xid) &&
2589 {
2590 /*
2591 * Install xmin and propagate the statusFlags that affect how the
2592 * value is interpreted by vacuum.
2593 */
2594 MyProc->xmin = TransactionXmin = xmin;
2595 MyProc->statusFlags = (MyProc->statusFlags & ~PROC_XMIN_FLAGS) |
2596 (proc->statusFlags & PROC_XMIN_FLAGS);
2598
2599 result = true;
2600 }
2601
2602 LWLockRelease(ProcArrayLock);
2603
2604 return result;
2605}
2606
2607/*
2608 * GetRunningTransactionData -- returns information about running transactions.
2609 *
2610 * Similar to GetSnapshotData but returns more information. We include
2611 * all PGPROCs with an assigned TransactionId, even VACUUM processes and
2612 * prepared transactions.
2613 *
2614 * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
2615 * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
2616 * array until the caller has WAL-logged this snapshot, and releases the
2617 * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
2618 * lock is released.
2619 *
2620 * The returned data structure is statically allocated; caller should not
2621 * modify it, and must not assume it is valid past the next call.
2622 *
2623 * This is never executed during recovery so there is no need to look at
2624 * KnownAssignedXids.
2625 *
2626 * Dummy PGPROCs from prepared transaction are included, meaning that this
2627 * may return entries with duplicated TransactionId values coming from
2628 * transaction finishing to prepare. Nothing is done about duplicated
2629 * entries here to not hold on ProcArrayLock more than necessary.
2630 *
2631 * We don't worry about updating other counters, we want to keep this as
2632 * simple as possible and leave GetSnapshotData() as the primary code for
2633 * that bookkeeping.
2634 *
2635 * Note that if any transaction has overflowed its cached subtransactions
2636 * then there is no real need include any subtransactions.
2637 */
2640{
2641 /* result workspace */
2642 static RunningTransactionsData CurrentRunningXactsData;
2643
2644 ProcArrayStruct *arrayP = procArray;
2645 TransactionId *other_xids = ProcGlobal->xids;
2646 RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
2647 TransactionId latestCompletedXid;
2648 TransactionId oldestRunningXid;
2649 TransactionId oldestDatabaseRunningXid;
2650 TransactionId *xids;
2651 int index;
2652 int count;
2653 int subcount;
2654 bool suboverflowed;
2655
2657
2658 /*
2659 * Allocating space for maxProcs xids is usually overkill; numProcs would
2660 * be sufficient. But it seems better to do the malloc while not holding
2661 * the lock, so we can't look at numProcs. Likewise, we allocate much
2662 * more subxip storage than is probably needed.
2663 *
2664 * Should only be allocated in bgwriter, since only ever executed during
2665 * checkpoints.
2666 */
2667 if (CurrentRunningXacts->xids == NULL)
2668 {
2669 /*
2670 * First call
2671 */
2672 CurrentRunningXacts->xids = (TransactionId *)
2674 if (CurrentRunningXacts->xids == NULL)
2675 ereport(ERROR,
2676 (errcode(ERRCODE_OUT_OF_MEMORY),
2677 errmsg("out of memory")));
2678 }
2679
2680 xids = CurrentRunningXacts->xids;
2681
2682 count = subcount = 0;
2683 suboverflowed = false;
2684
2685 /*
2686 * Ensure that no xids enter or leave the procarray while we obtain
2687 * snapshot.
2688 */
2689 LWLockAcquire(ProcArrayLock, LW_SHARED);
2690 LWLockAcquire(XidGenLock, LW_SHARED);
2691
2692 latestCompletedXid =
2694 oldestDatabaseRunningXid = oldestRunningXid =
2696
2697 /*
2698 * Spin over procArray collecting all xids
2699 */
2700 for (index = 0; index < arrayP->numProcs; index++)
2701 {
2702 TransactionId xid;
2703
2704 /* Fetch xid just once - see GetNewTransactionId */
2705 xid = UINT32_ACCESS_ONCE(other_xids[index]);
2706
2707 /*
2708 * We don't need to store transactions that don't have a TransactionId
2709 * yet because they will not show as running on a standby server.
2710 */
2711 if (!TransactionIdIsValid(xid))
2712 continue;
2713
2714 /*
2715 * Be careful not to exclude any xids before calculating the values of
2716 * oldestRunningXid and suboverflowed, since these are used to clean
2717 * up transaction information held on standbys.
2718 */
2719 if (TransactionIdPrecedes(xid, oldestRunningXid))
2720 oldestRunningXid = xid;
2721
2722 /*
2723 * Also, update the oldest running xid within the current database. As
2724 * fetching pgprocno and PGPROC could cause cache misses, we do cheap
2725 * TransactionId comparison first.
2726 */
2727 if (TransactionIdPrecedes(xid, oldestDatabaseRunningXid))
2728 {
2729 int pgprocno = arrayP->pgprocnos[index];
2730 PGPROC *proc = &allProcs[pgprocno];
2731
2732 if (proc->databaseId == MyDatabaseId)
2733 oldestDatabaseRunningXid = xid;
2734 }
2735
2737 suboverflowed = true;
2738
2739 /*
2740 * If we wished to exclude xids this would be the right place for it.
2741 * Procs with the PROC_IN_VACUUM flag set don't usually assign xids,
2742 * but they do during truncation at the end when they get the lock and
2743 * truncate, so it is not much of a problem to include them if they
2744 * are seen and it is cleaner to include them.
2745 */
2746
2747 xids[count++] = xid;
2748 }
2749
2750 /*
2751 * Spin over procArray collecting all subxids, but only if there hasn't
2752 * been a suboverflow.
2753 */
2754 if (!suboverflowed)
2755 {
2756 XidCacheStatus *other_subxidstates = ProcGlobal->subxidStates;
2757
2758 for (index = 0; index < arrayP->numProcs; index++)
2759 {
2760 int pgprocno = arrayP->pgprocnos[index];
2761 PGPROC *proc = &allProcs[pgprocno];
2762 int nsubxids;
2763
2764 /*
2765 * Save subtransaction XIDs. Other backends can't add or remove
2766 * entries while we're holding XidGenLock.
2767 */
2768 nsubxids = other_subxidstates[index].count;
2769 if (nsubxids > 0)
2770 {
2771 /* barrier not really required, as XidGenLock is held, but ... */
2772 pg_read_barrier(); /* pairs with GetNewTransactionId */
2773
2774 memcpy(&xids[count], proc->subxids.xids,
2775 nsubxids * sizeof(TransactionId));
2776 count += nsubxids;
2777 subcount += nsubxids;
2778
2779 /*
2780 * Top-level XID of a transaction is always less than any of
2781 * its subxids, so we don't need to check if any of the
2782 * subxids are smaller than oldestRunningXid
2783 */
2784 }
2785 }
2786 }
2787
2788 /*
2789 * It's important *not* to include the limits set by slots here because
2790 * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
2791 * were to be included here the initial value could never increase because
2792 * of a circular dependency where slots only increase their limits when
2793 * running xacts increases oldestRunningXid and running xacts only
2794 * increases if slots do.
2795 */
2796
2797 CurrentRunningXacts->xcnt = count - subcount;
2798 CurrentRunningXacts->subxcnt = subcount;
2799 CurrentRunningXacts->subxid_status = suboverflowed ? SUBXIDS_IN_SUBTRANS : SUBXIDS_IN_ARRAY;
2801 CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
2802 CurrentRunningXacts->oldestDatabaseRunningXid = oldestDatabaseRunningXid;
2803 CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
2804
2805 Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
2806 Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
2807 Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
2808
2809 /* We don't release the locks here, the caller is responsible for that */
2810
2811 return CurrentRunningXacts;
2812}
2813
2814/*
2815 * GetOldestActiveTransactionId()
2816 *
2817 * Similar to GetSnapshotData but returns just oldestActiveXid. We include
2818 * all PGPROCs with an assigned TransactionId, even VACUUM processes.
2819 *
2820 * If allDbs is true, we look at all databases, though there is no need to
2821 * include WALSender since this has no effect on hot standby conflicts. If
2822 * allDbs is false, skip processes attached to other databases.
2823 *
2824 * This is never executed during recovery so there is no need to look at
2825 * KnownAssignedXids.
2826 *
2827 * We don't worry about updating other counters, we want to keep this as
2828 * simple as possible and leave GetSnapshotData() as the primary code for
2829 * that bookkeeping.
2830 *
2831 * inCommitOnly indicates getting the oldestActiveXid among the transactions
2832 * in the commit critical section.
2833 */
2835GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs)
2836{
2837 ProcArrayStruct *arrayP = procArray;
2838 TransactionId *other_xids = ProcGlobal->xids;
2839 TransactionId oldestRunningXid;
2840 int index;
2841
2843
2844 /*
2845 * Read nextXid, as the upper bound of what's still active.
2846 *
2847 * Reading a TransactionId is atomic, but we must grab the lock to make
2848 * sure that all XIDs < nextXid are already present in the proc array (or
2849 * have already completed), when we spin over it.
2850 */
2851 LWLockAcquire(XidGenLock, LW_SHARED);
2853 LWLockRelease(XidGenLock);
2854
2855 /*
2856 * Spin over procArray collecting all xids and subxids.
2857 */
2858 LWLockAcquire(ProcArrayLock, LW_SHARED);
2859 for (index = 0; index < arrayP->numProcs; index++)
2860 {
2861 TransactionId xid;
2862 int pgprocno = arrayP->pgprocnos[index];
2863 PGPROC *proc = &allProcs[pgprocno];
2864
2865 /* Fetch xid just once - see GetNewTransactionId */
2866 xid = UINT32_ACCESS_ONCE(other_xids[index]);
2867
2868 if (!TransactionIdIsNormal(xid))
2869 continue;
2870
2871 if (inCommitOnly &&
2873 continue;
2874
2875 if (!allDbs && proc->databaseId != MyDatabaseId)
2876 continue;
2877
2878 if (TransactionIdPrecedes(xid, oldestRunningXid))
2879 oldestRunningXid = xid;
2880
2881 /*
2882 * Top-level XID of a transaction is always less than any of its
2883 * subxids, so we don't need to check if any of the subxids are
2884 * smaller than oldestRunningXid
2885 */
2886 }
2887 LWLockRelease(ProcArrayLock);
2888
2889 return oldestRunningXid;
2890}
2891
2892/*
2893 * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
2894 *
2895 * Returns the oldest xid that we can guarantee not to have been affected by
2896 * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
2897 * transaction aborted. Note that the value can (and most of the time will) be
2898 * much more conservative than what really has been affected by vacuum, but we
2899 * currently don't have better data available.
2900 *
2901 * This is useful to initialize the cutoff xid after which a new changeset
2902 * extraction replication slot can start decoding changes.
2903 *
2904 * Must be called with ProcArrayLock held either shared or exclusively,
2905 * although most callers will want to use exclusive mode since it is expected
2906 * that the caller will immediately use the xid to peg the xmin horizon.
2907 */
2910{
2911 ProcArrayStruct *arrayP = procArray;
2912 TransactionId oldestSafeXid;
2913 int index;
2914 bool recovery_in_progress = RecoveryInProgress();
2915
2916 Assert(LWLockHeldByMe(ProcArrayLock));
2917
2918 /*
2919 * Acquire XidGenLock, so no transactions can acquire an xid while we're
2920 * running. If no transaction with xid were running concurrently a new xid
2921 * could influence the RecentXmin et al.
2922 *
2923 * We initialize the computation to nextXid since that's guaranteed to be
2924 * a safe, albeit pessimal, value.
2925 */
2926 LWLockAcquire(XidGenLock, LW_SHARED);
2928
2929 /*
2930 * If there's already a slot pegging the xmin horizon, we can start with
2931 * that value, it's guaranteed to be safe since it's computed by this
2932 * routine initially and has been enforced since. We can always use the
2933 * slot's general xmin horizon, but the catalog horizon is only usable
2934 * when only catalog data is going to be looked at.
2935 */
2938 oldestSafeXid))
2939 oldestSafeXid = procArray->replication_slot_xmin;
2940
2941 if (catalogOnly &&
2944 oldestSafeXid))
2946
2947 /*
2948 * If we're not in recovery, we walk over the procarray and collect the
2949 * lowest xid. Since we're called with ProcArrayLock held and have
2950 * acquired XidGenLock, no entries can vanish concurrently, since
2951 * ProcGlobal->xids[i] is only set with XidGenLock held and only cleared
2952 * with ProcArrayLock held.
2953 *
2954 * In recovery we can't lower the safe value besides what we've computed
2955 * above, so we'll have to wait a bit longer there. We unfortunately can
2956 * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
2957 * machinery can miss values and return an older value than is safe.
2958 */
2959 if (!recovery_in_progress)
2960 {
2961 TransactionId *other_xids = ProcGlobal->xids;
2962
2963 /*
2964 * Spin over procArray collecting min(ProcGlobal->xids[i])
2965 */
2966 for (index = 0; index < arrayP->numProcs; index++)
2967 {
2968 TransactionId xid;
2969
2970 /* Fetch xid just once - see GetNewTransactionId */
2971 xid = UINT32_ACCESS_ONCE(other_xids[index]);
2972
2973 if (!TransactionIdIsNormal(xid))
2974 continue;
2975
2976 if (TransactionIdPrecedes(xid, oldestSafeXid))
2977 oldestSafeXid = xid;
2978 }
2979 }
2980
2981 LWLockRelease(XidGenLock);
2982
2983 return oldestSafeXid;
2984}
2985
2986/*
2987 * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
2988 * delaying checkpoint because they have critical actions in progress.
2989 *
2990 * Constructs an array of VXIDs of transactions that are currently in commit
2991 * critical sections, as shown by having specified delayChkptFlags bits set
2992 * in their PGPROC.
2993 *
2994 * Returns a palloc'd array that should be freed by the caller.
2995 * *nvxids is the number of valid entries.
2996 *
2997 * Note that because backends set or clear delayChkptFlags without holding any
2998 * lock, the result is somewhat indeterminate, but we don't really care. Even
2999 * in a multiprocessor with delayed writes to shared memory, it should be
3000 * certain that setting of delayChkptFlags will propagate to shared memory
3001 * when the backend takes a lock, so we cannot fail to see a virtual xact as
3002 * delayChkptFlags if it's already inserted its commit record. Whether it
3003 * takes a little while for clearing of delayChkptFlags to propagate is
3004 * unimportant for correctness.
3005 */
3008{
3009 VirtualTransactionId *vxids;
3010 ProcArrayStruct *arrayP = procArray;
3011 int count = 0;
3012 int index;
3013
3014 Assert(type != 0);
3015
3016 /* allocate what's certainly enough result space */
3017 vxids = palloc_array(VirtualTransactionId, arrayP->maxProcs);
3018
3019 LWLockAcquire(ProcArrayLock, LW_SHARED);
3020
3021 for (index = 0; index < arrayP->numProcs; index++)
3022 {
3023 int pgprocno = arrayP->pgprocnos[index];
3024 PGPROC *proc = &allProcs[pgprocno];
3025
3026 if ((proc->delayChkptFlags & type) != 0)
3027 {
3029
3030 GET_VXID_FROM_PGPROC(vxid, *proc);
3032 vxids[count++] = vxid;
3033 }
3034 }
3035
3036 LWLockRelease(ProcArrayLock);
3037
3038 *nvxids = count;
3039 return vxids;
3040}
3041
3042/*
3043 * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
3044 *
3045 * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
3046 * of the specified VXIDs are still in critical sections of code.
3047 *
3048 * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
3049 * those numbers should be small enough for it not to be a problem.
3050 */
3051bool
3053{
3054 bool result = false;
3055 ProcArrayStruct *arrayP = procArray;
3056 int index;
3057
3058 Assert(type != 0);
3059
3060 LWLockAcquire(ProcArrayLock, LW_SHARED);
3061
3062 for (index = 0; index < arrayP->numProcs; index++)
3063 {
3064 int pgprocno = arrayP->pgprocnos[index];
3065 PGPROC *proc = &allProcs[pgprocno];
3067
3068 GET_VXID_FROM_PGPROC(vxid, *proc);
3069
3070 if ((proc->delayChkptFlags & type) != 0 &&
3072 {
3073 int i;
3074
3075 for (i = 0; i < nvxids; i++)
3076 {
3077 if (VirtualTransactionIdEquals(vxid, vxids[i]))
3078 {
3079 result = true;
3080 break;
3081 }
3082 }
3083 if (result)
3084 break;
3085 }
3086 }
3087
3088 LWLockRelease(ProcArrayLock);
3089
3090 return result;
3091}
3092
3093/*
3094 * ProcNumberGetProc -- get a backend's PGPROC given its proc number
3095 *
3096 * The result may be out of date arbitrarily quickly, so the caller
3097 * must be careful about how this information is used. NULL is
3098 * returned if the backend is not active.
3099 */
3100PGPROC *
3102{
3103 PGPROC *result;
3104
3105 if (procNumber < 0 || procNumber >= ProcGlobal->allProcCount)
3106 return NULL;
3107 result = GetPGProcByNumber(procNumber);
3108
3109 if (result->pid == 0)
3110 return NULL;
3111
3112 return result;
3113}
3114
3115/*
3116 * ProcNumberGetTransactionIds -- get a backend's transaction status
3117 *
3118 * Get the xid, xmin, nsubxid and overflow status of the backend. The
3119 * result may be out of date arbitrarily quickly, so the caller must be
3120 * careful about how this information is used.
3121 */
3122void
3124 TransactionId *xmin, int *nsubxid, bool *overflowed)
3125{
3126 PGPROC *proc;
3127
3128 *xid = InvalidTransactionId;
3129 *xmin = InvalidTransactionId;
3130 *nsubxid = 0;
3131 *overflowed = false;
3132
3133 if (procNumber < 0 || procNumber >= ProcGlobal->allProcCount)
3134 return;
3135 proc = GetPGProcByNumber(procNumber);
3136
3137 /* Need to lock out additions/removals of backends */
3138 LWLockAcquire(ProcArrayLock, LW_SHARED);
3139
3140 if (proc->pid != 0)
3141 {
3142 *xid = proc->xid;
3143 *xmin = proc->xmin;
3144 *nsubxid = proc->subxidStatus.count;
3145 *overflowed = proc->subxidStatus.overflowed;
3146 }
3147
3148 LWLockRelease(ProcArrayLock);
3149}
3150
3151/*
3152 * BackendPidGetProc -- get a backend's PGPROC given its PID
3153 *
3154 * Returns NULL if not found. Note that it is up to the caller to be
3155 * sure that the question remains meaningful for long enough for the
3156 * answer to be used ...
3157 */
3158PGPROC *
3160{
3161 PGPROC *result;
3162
3163 if (pid == 0) /* never match dummy PGPROCs */
3164 return NULL;
3165
3166 LWLockAcquire(ProcArrayLock, LW_SHARED);
3167
3168 result = BackendPidGetProcWithLock(pid);
3169
3170 LWLockRelease(ProcArrayLock);
3171
3172 return result;
3173}
3174
3175/*
3176 * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID
3177 *
3178 * Same as above, except caller must be holding ProcArrayLock. The found
3179 * entry, if any, can be assumed to be valid as long as the lock remains held.
3180 */
3181PGPROC *
3183{
3184 PGPROC *result = NULL;
3185 ProcArrayStruct *arrayP = procArray;
3186 int index;
3187
3188 if (pid == 0) /* never match dummy PGPROCs */
3189 return NULL;
3190
3191 for (index = 0; index < arrayP->numProcs; index++)
3192 {
3193 PGPROC *proc = &allProcs[arrayP->pgprocnos[index]];
3194
3195 if (proc->pid == pid)
3196 {
3197 result = proc;
3198 break;
3199 }
3200 }
3201
3202 return result;
3203}
3204
3205/*
3206 * BackendXidGetPid -- get a backend's pid given its XID
3207 *
3208 * Returns 0 if not found or it's a prepared transaction. Note that
3209 * it is up to the caller to be sure that the question remains
3210 * meaningful for long enough for the answer to be used ...
3211 *
3212 * Only main transaction Ids are considered. This function is mainly
3213 * useful for determining what backend owns a lock.
3214 *
3215 * Beware that not every xact has an XID assigned. However, as long as you
3216 * only call this using an XID found on disk, you're safe.
3217 */
3218int
3220{
3221 int result = 0;
3222 ProcArrayStruct *arrayP = procArray;
3223 TransactionId *other_xids = ProcGlobal->xids;
3224 int index;
3225
3226 if (xid == InvalidTransactionId) /* never match invalid xid */
3227 return 0;
3228
3229 LWLockAcquire(ProcArrayLock, LW_SHARED);
3230
3231 for (index = 0; index < arrayP->numProcs; index++)
3232 {
3233 if (other_xids[index] == xid)
3234 {
3235 int pgprocno = arrayP->pgprocnos[index];
3236 PGPROC *proc = &allProcs[pgprocno];
3237
3238 result = proc->pid;
3239 break;
3240 }
3241 }
3242
3243 LWLockRelease(ProcArrayLock);
3244
3245 return result;
3246}
3247
3248/*
3249 * IsBackendPid -- is a given pid a running backend
3250 *
3251 * This is not called by the backend, but is called by external modules.
3252 */
3253bool
3255{
3256 return (BackendPidGetProc(pid) != NULL);
3257}
3258
3259
3260/*
3261 * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
3262 *
3263 * The array is palloc'd. The number of valid entries is returned into *nvxids.
3264 *
3265 * The arguments allow filtering the set of VXIDs returned. Our own process
3266 * is always skipped. In addition:
3267 * If limitXmin is not InvalidTransactionId, skip processes with
3268 * xmin > limitXmin.
3269 * If excludeXmin0 is true, skip processes with xmin = 0.
3270 * If allDbs is false, skip processes attached to other databases.
3271 * If excludeVacuum isn't zero, skip processes for which
3272 * (statusFlags & excludeVacuum) is not zero.
3273 *
3274 * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
3275 * allow skipping backends whose oldest live snapshot is no older than
3276 * some snapshot we have. Since we examine the procarray with only shared
3277 * lock, there are race conditions: a backend could set its xmin just after
3278 * we look. Indeed, on multiprocessors with weak memory ordering, the
3279 * other backend could have set its xmin *before* we look. We know however
3280 * that such a backend must have held shared ProcArrayLock overlapping our
3281 * own hold of ProcArrayLock, else we would see its xmin update. Therefore,
3282 * any snapshot the other backend is taking concurrently with our scan cannot
3283 * consider any transactions as still running that we think are committed
3284 * (since backends must hold ProcArrayLock exclusive to commit).
3285 */
3287GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
3288 bool allDbs, int excludeVacuum,
3289 int *nvxids)
3290{
3291 VirtualTransactionId *vxids;
3292 ProcArrayStruct *arrayP = procArray;
3293 int count = 0;
3294 int index;
3295
3296 /* allocate what's certainly enough result space */
3297 vxids = palloc_array(VirtualTransactionId, arrayP->maxProcs);
3298
3299 LWLockAcquire(ProcArrayLock, LW_SHARED);
3300
3301 for (index = 0; index < arrayP->numProcs; index++)
3302 {
3303 int pgprocno = arrayP->pgprocnos[index];
3304 PGPROC *proc = &allProcs[pgprocno];
3305 uint8 statusFlags = ProcGlobal->statusFlags[index];
3306
3307 if (proc == MyProc)
3308 continue;
3309
3310 if (excludeVacuum & statusFlags)
3311 continue;
3312
3313 if (allDbs || proc->databaseId == MyDatabaseId)
3314 {
3315 /* Fetch xmin just once - might change on us */
3316 TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
3317
3318 if (excludeXmin0 && !TransactionIdIsValid(pxmin))
3319 continue;
3320
3321 /*
3322 * InvalidTransactionId precedes all other XIDs, so a proc that
3323 * hasn't set xmin yet will not be rejected by this test.
3324 */
3325 if (!TransactionIdIsValid(limitXmin) ||
3326 TransactionIdPrecedesOrEquals(pxmin, limitXmin))
3327 {
3329
3330 GET_VXID_FROM_PGPROC(vxid, *proc);
3332 vxids[count++] = vxid;
3333 }
3334 }
3335 }
3336
3337 LWLockRelease(ProcArrayLock);
3338
3339 *nvxids = count;
3340 return vxids;
3341}
3342
3343/*
3344 * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
3345 *
3346 * Usage is limited to conflict resolution during recovery on standby servers.
3347 * limitXmin is supplied as either a cutoff with snapshotConflictHorizon
3348 * semantics, or InvalidTransactionId in cases where caller cannot accurately
3349 * determine a safe snapshotConflictHorizon value.
3350 *
3351 * If limitXmin is InvalidTransactionId then we want to kill everybody,
3352 * so we're not worried if they have a snapshot or not, nor does it really
3353 * matter what type of lock we hold. Caller must avoid calling here with
3354 * snapshotConflictHorizon style cutoffs that were set to InvalidTransactionId
3355 * during original execution, since that actually indicates that there is
3356 * definitely no need for a recovery conflict (the snapshotConflictHorizon
3357 * convention for InvalidTransactionId values is the opposite of our own!).
3358 *
3359 * All callers that are checking xmins always now supply a valid and useful
3360 * value for limitXmin. The limitXmin is always lower than the lowest
3361 * numbered KnownAssignedXid that is not already a FATAL error. This is
3362 * because we only care about cleanup records that are cleaning up tuple
3363 * versions from committed transactions. In that case they will only occur
3364 * at the point where the record is less than the lowest running xid. That
3365 * allows us to say that if any backend takes a snapshot concurrently with
3366 * us then the conflict assessment made here would never include the snapshot
3367 * that is being derived. So we take LW_SHARED on the ProcArray and allow
3368 * concurrent snapshots when limitXmin is valid. We might think about adding
3369 * Assert(limitXmin < lowest(KnownAssignedXids))
3370 * but that would not be true in the case of FATAL errors lagging in array,
3371 * but we already know those are bogus anyway, so we skip that test.
3372 *
3373 * If dbOid is valid we skip backends attached to other databases.
3374 *
3375 * Be careful to *not* pfree the result from this function. We reuse
3376 * this array sufficiently often that we use malloc for the result.
3377 */
3380{
3381 static VirtualTransactionId *vxids;
3382 ProcArrayStruct *arrayP = procArray;
3383 int count = 0;
3384 int index;
3385
3386 /*
3387 * If first time through, get workspace to remember main XIDs in. We
3388 * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
3389 * result space, remembering room for a terminator.
3390 */
3391 if (vxids == NULL)
3392 {
3393 vxids = (VirtualTransactionId *)
3394 malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
3395 if (vxids == NULL)
3396 ereport(ERROR,
3397 (errcode(ERRCODE_OUT_OF_MEMORY),
3398 errmsg("out of memory")));
3399 }
3400
3401 LWLockAcquire(ProcArrayLock, LW_SHARED);
3402
3403 for (index = 0; index < arrayP->numProcs; index++)
3404 {
3405 int pgprocno = arrayP->pgprocnos[index];
3406 PGPROC *proc = &allProcs[pgprocno];
3407
3408 /* Exclude prepared transactions */
3409 if (proc->pid == 0)
3410 continue;
3411
3412 if (!OidIsValid(dbOid) ||
3413 proc->databaseId == dbOid)
3414 {
3415 /* Fetch xmin just once - can't change on us, but good coding */
3416 TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
3417
3418 /*
3419 * We ignore an invalid pxmin because this means that backend has
3420 * no snapshot currently. We hold a Share lock to avoid contention
3421 * with users taking snapshots. That is not a problem because the
3422 * current xmin is always at least one higher than the latest
3423 * removed xid, so any new snapshot would never conflict with the
3424 * test here.
3425 */
3426 if (!TransactionIdIsValid(limitXmin) ||
3427 (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin)))
3428 {
3430
3431 GET_VXID_FROM_PGPROC(vxid, *proc);
3433 vxids[count++] = vxid;
3434 }
3435 }
3436 }
3437
3438 LWLockRelease(ProcArrayLock);
3439
3440 /* add the terminator */
3441 vxids[count].procNumber = INVALID_PROC_NUMBER;
3443
3444 return vxids;
3445}
3446
3447/*
3448 * CancelVirtualTransaction - used in recovery conflict processing
3449 *
3450 * Returns pid of the process signaled, or 0 if not found.
3451 */
3452pid_t
3454{
3455 return SignalVirtualTransaction(vxid, sigmode, true);
3456}
3457
3458pid_t
3460 bool conflictPending)
3461{
3462 ProcArrayStruct *arrayP = procArray;
3463 int index;
3464 pid_t pid = 0;
3465
3466 LWLockAcquire(ProcArrayLock, LW_SHARED);
3467
3468 for (index = 0; index < arrayP->numProcs; index++)
3469 {
3470 int pgprocno = arrayP->pgprocnos[index];
3471 PGPROC *proc = &allProcs[pgprocno];
3472 VirtualTransactionId procvxid;
3473
3474 GET_VXID_FROM_PGPROC(procvxid, *proc);
3475
3476 if (procvxid.procNumber == vxid.procNumber &&
3477 procvxid.localTransactionId == vxid.localTransactionId)
3478 {
3479 proc->recoveryConflictPending = conflictPending;
3480 pid = proc->pid;
3481 if (pid != 0)
3482 {
3483 /*
3484 * Kill the pid if it's still here. If not, that's what we
3485 * wanted so ignore any errors.
3486 */
3487 (void) SendProcSignal(pid, sigmode, vxid.procNumber);
3488 }
3489 break;
3490 }
3491 }
3492
3493 LWLockRelease(ProcArrayLock);
3494
3495 return pid;
3496}
3497
3498/*
3499 * MinimumActiveBackends --- count backends (other than myself) that are
3500 * in active transactions. Return true if the count exceeds the
3501 * minimum threshold passed. This is used as a heuristic to decide if
3502 * a pre-XLOG-flush delay is worthwhile during commit.
3503 *
3504 * Do not count backends that are blocked waiting for locks, since they are
3505 * not going to get to run until someone else commits.
3506 */
3507bool
3509{
3510 ProcArrayStruct *arrayP = procArray;
3511 int count = 0;
3512 int index;
3513
3514 /* Quick short-circuit if no minimum is specified */
3515 if (min == 0)
3516 return true;
3517
3518 /*
3519 * Note: for speed, we don't acquire ProcArrayLock. This is a little bit
3520 * bogus, but since we are only testing fields for zero or nonzero, it
3521 * should be OK. The result is only used for heuristic purposes anyway...
3522 */
3523 for (index = 0; index < arrayP->numProcs; index++)
3524 {
3525 int pgprocno = arrayP->pgprocnos[index];
3526 PGPROC *proc = &allProcs[pgprocno];
3527
3528 /*
3529 * Since we're not holding a lock, need to be prepared to deal with
3530 * garbage, as someone could have incremented numProcs but not yet
3531 * filled the structure.
3532 *
3533 * If someone just decremented numProcs, 'proc' could also point to a
3534 * PGPROC entry that's no longer in the array. It still points to a
3535 * PGPROC struct, though, because freed PGPROC entries just go to the
3536 * free list and are recycled. Its contents are nonsense in that case,
3537 * but that's acceptable for this function.
3538 */
3539 if (pgprocno == -1)
3540 continue; /* do not count deleted entries */
3541 if (proc == MyProc)
3542 continue; /* do not count myself */
3543 if (proc->xid == InvalidTransactionId)
3544 continue; /* do not count if no XID assigned */
3545 if (proc->pid == 0)
3546 continue; /* do not count prepared xacts */
3547 if (proc->waitLock != NULL)
3548 continue; /* do not count if blocked on a lock */
3549 count++;
3550 if (count >= min)
3551 break;
3552 }
3553
3554 return count >= min;
3555}
3556
3557/*
3558 * CountDBBackends --- count backends that are using specified database
3559 */
3560int
3562{
3563 ProcArrayStruct *arrayP = procArray;
3564 int count = 0;
3565 int index;
3566
3567 LWLockAcquire(ProcArrayLock, LW_SHARED);
3568
3569 for (index = 0; index < arrayP->numProcs; index++)
3570 {
3571 int pgprocno = arrayP->pgprocnos[index];
3572 PGPROC *proc = &allProcs[pgprocno];
3573
3574 if (proc->pid == 0)
3575 continue; /* do not count prepared xacts */
3576 if (!OidIsValid(databaseid) ||
3577 proc->databaseId == databaseid)
3578 count++;
3579 }
3580
3581 LWLockRelease(ProcArrayLock);
3582
3583 return count;
3584}
3585
3586/*
3587 * CountDBConnections --- counts database backends (only regular backends)
3588 */
3589int
3591{
3592 ProcArrayStruct *arrayP = procArray;
3593 int count = 0;
3594 int index;
3595
3596 LWLockAcquire(ProcArrayLock, LW_SHARED);
3597
3598 for (index = 0; index < arrayP->numProcs; index++)
3599 {
3600 int pgprocno = arrayP->pgprocnos[index];
3601 PGPROC *proc = &allProcs[pgprocno];
3602
3603 if (proc->pid == 0)
3604 continue; /* do not count prepared xacts */
3605 if (!proc->isRegularBackend)
3606 continue; /* count only regular backend processes */
3607 if (!OidIsValid(databaseid) ||
3608 proc->databaseId == databaseid)
3609 count++;
3610 }
3611
3612 LWLockRelease(ProcArrayLock);
3613
3614 return count;
3615}
3616
3617/*
3618 * CancelDBBackends --- cancel backends that are using specified database
3619 */
3620void
3621CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
3622{
3623 ProcArrayStruct *arrayP = procArray;
3624 int index;
3625
3626 /* tell all backends to die */
3627 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3628
3629 for (index = 0; index < arrayP->numProcs; index++)
3630 {
3631 int pgprocno = arrayP->pgprocnos[index];
3632 PGPROC *proc = &allProcs[pgprocno];
3633
3634 if (databaseid == InvalidOid || proc->databaseId == databaseid)
3635 {
3636 VirtualTransactionId procvxid;
3637 pid_t pid;
3638
3639 GET_VXID_FROM_PGPROC(procvxid, *proc);
3640
3641 proc->recoveryConflictPending = conflictPending;
3642 pid = proc->pid;
3643 if (pid != 0)
3644 {
3645 /*
3646 * Kill the pid if it's still here. If not, that's what we
3647 * wanted so ignore any errors.
3648 */
3649 (void) SendProcSignal(pid, sigmode, procvxid.procNumber);
3650 }
3651 }
3652 }
3653
3654 LWLockRelease(ProcArrayLock);
3655}
3656
3657/*
3658 * CountUserBackends --- count backends that are used by specified user
3659 * (only regular backends, not any type of background worker)
3660 */
3661int
3663{
3664 ProcArrayStruct *arrayP = procArray;
3665 int count = 0;
3666 int index;
3667
3668 LWLockAcquire(ProcArrayLock, LW_SHARED);
3669
3670 for (index = 0; index < arrayP->numProcs; index++)
3671 {
3672 int pgprocno = arrayP->pgprocnos[index];
3673 PGPROC *proc = &allProcs[pgprocno];
3674
3675 if (proc->pid == 0)
3676 continue; /* do not count prepared xacts */
3677 if (!proc->isRegularBackend)
3678 continue; /* count only regular backend processes */
3679 if (proc->roleId == roleid)
3680 count++;
3681 }
3682
3683 LWLockRelease(ProcArrayLock);
3684
3685 return count;
3686}
3687
3688/*
3689 * CountOtherDBBackends -- check for other backends running in the given DB
3690 *
3691 * If there are other backends in the DB, we will wait a maximum of 5 seconds
3692 * for them to exit (or 0.3s for testing purposes). Autovacuum backends are
3693 * encouraged to exit early by sending them SIGTERM, but normal user backends
3694 * are just waited for. If background workers connected to this database are
3695 * marked as interruptible, they are terminated.
3696 *
3697 * The current backend is always ignored; it is caller's responsibility to
3698 * check whether the current backend uses the given DB, if it's important.
3699 *
3700 * Returns true if there are (still) other backends in the DB, false if not.
3701 * Also, *nbackends and *nprepared are set to the number of other backends
3702 * and prepared transactions in the DB, respectively.
3703 *
3704 * This function is used to interlock DROP DATABASE and related commands
3705 * against there being any active backends in the target DB --- dropping the
3706 * DB while active backends remain would be a Bad Thing. Note that we cannot
3707 * detect here the possibility of a newly-started backend that is trying to
3708 * connect to the doomed database, so additional interlocking is needed during
3709 * backend startup. The caller should normally hold an exclusive lock on the
3710 * target DB before calling this, which is one reason we mustn't wait
3711 * indefinitely.
3712 */
3713bool
3714CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
3715{
3716 ProcArrayStruct *arrayP = procArray;
3717
3718#define MAXAUTOVACPIDS 10 /* max autovacs to SIGTERM per iteration */
3719 int autovac_pids[MAXAUTOVACPIDS];
3720
3721 /*
3722 * Retry up to 50 times with 100ms between attempts (max 5s total). Can be
3723 * reduced to 3 attempts (max 0.3s total) to speed up tests.
3724 */
3725 int ntries = 50;
3726
3727#ifdef USE_INJECTION_POINTS
3728 if (IS_INJECTION_POINT_ATTACHED("procarray-reduce-count"))
3729 ntries = 3;
3730#endif
3731
3732 for (int tries = 0; tries < ntries; tries++)
3733 {
3734 int nautovacs = 0;
3735 bool found = false;
3736 int index;
3737
3739
3740 *nbackends = *nprepared = 0;
3741
3742 LWLockAcquire(ProcArrayLock, LW_SHARED);
3743
3744 for (index = 0; index < arrayP->numProcs; index++)
3745 {
3746 int pgprocno = arrayP->pgprocnos[index];
3747 PGPROC *proc = &allProcs[pgprocno];
3748 uint8 statusFlags = ProcGlobal->statusFlags[index];
3749
3750 if (proc->databaseId != databaseId)
3751 continue;
3752 if (proc == MyProc)
3753 continue;
3754
3755 found = true;
3756
3757 if (proc->pid == 0)
3758 (*nprepared)++;
3759 else
3760 {
3761 (*nbackends)++;
3762 if ((statusFlags & PROC_IS_AUTOVACUUM) &&
3763 nautovacs < MAXAUTOVACPIDS)
3764 autovac_pids[nautovacs++] = proc->pid;
3765 }
3766 }
3767
3768 LWLockRelease(ProcArrayLock);
3769
3770 if (!found)
3771 return false; /* no conflicting backends, so done */
3772
3773 /*
3774 * Send SIGTERM to any conflicting autovacuums before sleeping. We
3775 * postpone this step until after the loop because we don't want to
3776 * hold ProcArrayLock while issuing kill(). We have no idea what might
3777 * block kill() inside the kernel...
3778 */
3779 for (index = 0; index < nautovacs; index++)
3780 (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */
3781
3782 /*
3783 * Terminate all background workers for this database, if they have
3784 * requested it (BGWORKER_INTERRUPTIBLE).
3785 */
3787
3788 /* sleep, then try again */
3789 pg_usleep(100 * 1000L); /* 100ms */
3790 }
3791
3792 return true; /* timed out, still conflicts */
3793}
3794
3795/*
3796 * Terminate existing connections to the specified database. This routine
3797 * is used by the DROP DATABASE command when user has asked to forcefully
3798 * drop the database.
3799 *
3800 * The current backend is always ignored; it is caller's responsibility to
3801 * check whether the current backend uses the given DB, if it's important.
3802 *
3803 * If the target database has a prepared transaction or permissions checks
3804 * fail for a connection, this fails without terminating anything.
3805 */
3806void
3808{
3809 ProcArrayStruct *arrayP = procArray;
3810 List *pids = NIL;
3811 int nprepared = 0;
3812 int i;
3813
3814 LWLockAcquire(ProcArrayLock, LW_SHARED);
3815
3816 for (i = 0; i < procArray->numProcs; i++)
3817 {
3818 int pgprocno = arrayP->pgprocnos[i];
3819 PGPROC *proc = &allProcs[pgprocno];
3820
3821 if (proc->databaseId != databaseId)
3822 continue;
3823 if (proc == MyProc)
3824 continue;
3825
3826 if (proc->pid != 0)
3827 pids = lappend_int(pids, proc->pid);
3828 else
3829 nprepared++;
3830 }
3831
3832 LWLockRelease(ProcArrayLock);
3833
3834 if (nprepared > 0)
3835 ereport(ERROR,
3836 (errcode(ERRCODE_OBJECT_IN_USE),
3837 errmsg("database \"%s\" is being used by prepared transactions",
3838 get_database_name(databaseId)),
3839 errdetail_plural("There is %d prepared transaction using the database.",
3840 "There are %d prepared transactions using the database.",
3841 nprepared,
3842 nprepared)));
3843
3844 if (pids)
3845 {
3846 ListCell *lc;
3847
3848 /*
3849 * Permissions checks relax the pg_terminate_backend checks in two
3850 * ways, both by omitting the !OidIsValid(proc->roleId) check:
3851 *
3852 * - Accept terminating autovacuum workers, since DROP DATABASE
3853 * without FORCE terminates them.
3854 *
3855 * - Accept terminating bgworkers. For bgworker authors, it's
3856 * convenient to be able to recommend FORCE if a worker is blocking
3857 * DROP DATABASE unexpectedly.
3858 *
3859 * Unlike pg_terminate_backend, we don't raise some warnings - like
3860 * "PID %d is not a PostgreSQL server process", because for us already
3861 * finished session is not a problem.
3862 */
3863 foreach(lc, pids)
3864 {
3865 int pid = lfirst_int(lc);
3866 PGPROC *proc = BackendPidGetProc(pid);
3867
3868 if (proc != NULL)
3869 {
3870 if (superuser_arg(proc->roleId) && !superuser())
3871 ereport(ERROR,
3872 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3873 errmsg("permission denied to terminate process"),
3874 errdetail("Only roles with the %s attribute may terminate processes of roles with the %s attribute.",
3875 "SUPERUSER", "SUPERUSER")));
3876
3877 if (!has_privs_of_role(GetUserId(), proc->roleId) &&
3878 !has_privs_of_role(GetUserId(), ROLE_PG_SIGNAL_BACKEND))
3879 ereport(ERROR,
3880 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3881 errmsg("permission denied to terminate process"),
3882 errdetail("Only roles with privileges of the role whose process is being terminated or with privileges of the \"%s\" role may terminate this process.",
3883 "pg_signal_backend")));
3884 }
3885 }
3886
3887 /*
3888 * There's a race condition here: once we release the ProcArrayLock,
3889 * it's possible for the session to exit before we issue kill. That
3890 * race condition possibility seems too unlikely to worry about. See
3891 * pg_signal_backend.
3892 */
3893 foreach(lc, pids)
3894 {
3895 int pid = lfirst_int(lc);
3896 PGPROC *proc = BackendPidGetProc(pid);
3897
3898 if (proc != NULL)
3899 {
3900 /*
3901 * If we have setsid(), signal the backend's whole process
3902 * group
3903 */
3904#ifdef HAVE_SETSID
3905 (void) kill(-pid, SIGTERM);
3906#else
3907 (void) kill(pid, SIGTERM);
3908#endif
3909 }
3910 }
3911 }
3912}
3913
3914/*
3915 * ProcArraySetReplicationSlotXmin
3916 *
3917 * Install limits to future computations of the xmin horizon to prevent vacuum
3918 * and HOT pruning from removing affected rows still needed by clients with
3919 * replication slots.
3920 */
3921void
3923 bool already_locked)
3924{
3925 Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
3926
3927 if (!already_locked)
3928 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3929
3932
3933 if (!already_locked)
3934 LWLockRelease(ProcArrayLock);
3935
3936 elog(DEBUG1, "xmin required by slots: data %u, catalog %u",
3937 xmin, catalog_xmin);
3938}
3939
3940/*
3941 * ProcArrayGetReplicationSlotXmin
3942 *
3943 * Return the current slot xmin limits. That's useful to be able to remove
3944 * data that's older than those limits.
3945 */
3946void
3948 TransactionId *catalog_xmin)
3949{
3950 LWLockAcquire(ProcArrayLock, LW_SHARED);
3951
3952 if (xmin != NULL)
3954
3955 if (catalog_xmin != NULL)
3957
3958 LWLockRelease(ProcArrayLock);
3959}
3960
3961/*
3962 * XidCacheRemoveRunningXids
3963 *
3964 * Remove a bunch of TransactionIds from the list of known-running
3965 * subtransactions for my backend. Both the specified xid and those in
3966 * the xids[] array (of length nxids) are removed from the subxids cache.
3967 * latestXid must be the latest XID among the group.
3968 */
3969void
3971 int nxids, const TransactionId *xids,
3972 TransactionId latestXid)
3973{
3974 int i,
3975 j;
3976 XidCacheStatus *mysubxidstat;
3977
3979
3980 /*
3981 * We must hold ProcArrayLock exclusively in order to remove transactions
3982 * from the PGPROC array. (See src/backend/access/transam/README.) It's
3983 * possible this could be relaxed since we know this routine is only used
3984 * to abort subtransactions, but pending closer analysis we'd best be
3985 * conservative.
3986 *
3987 * Note that we do not have to be careful about memory ordering of our own
3988 * reads wrt. GetNewTransactionId() here - only this process can modify
3989 * relevant fields of MyProc/ProcGlobal->xids[]. But we do have to be
3990 * careful about our own writes being well ordered.
3991 */
3992 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3993
3994 mysubxidstat = &ProcGlobal->subxidStates[MyProc->pgxactoff];
3995
3996 /*
3997 * Under normal circumstances xid and xids[] will be in increasing order,
3998 * as will be the entries in subxids. Scan backwards to avoid O(N^2)
3999 * behavior when removing a lot of xids.
4000 */
4001 for (i = nxids - 1; i >= 0; i--)
4002 {
4003 TransactionId anxid = xids[i];
4004
4005 for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
4006 {
4007 if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
4008 {
4011 mysubxidstat->count--;
4013 break;
4014 }
4015 }
4016
4017 /*
4018 * Ordinarily we should have found it, unless the cache has
4019 * overflowed. However it's also possible for this routine to be
4020 * invoked multiple times for the same subtransaction, in case of an
4021 * error during AbortSubTransaction. So instead of Assert, emit a
4022 * debug warning.
4023 */
4024 if (j < 0 && !MyProc->subxidStatus.overflowed)
4025 elog(WARNING, "did not find subXID %u in MyProc", anxid);
4026 }
4027
4028 for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
4029 {
4031 {
4034 mysubxidstat->count--;
4036 break;
4037 }
4038 }
4039 /* Ordinarily we should have found it, unless the cache has overflowed */
4040 if (j < 0 && !MyProc->subxidStatus.overflowed)
4041 elog(WARNING, "did not find subXID %u in MyProc", xid);
4042
4043 /* Also advance global latestCompletedXid while holding the lock */
4044 MaintainLatestCompletedXid(latestXid);
4045
4046 /* ... and xactCompletionCount */
4048
4049 LWLockRelease(ProcArrayLock);
4050}
4051
4052#ifdef XIDCACHE_DEBUG
4053
4054/*
4055 * Print stats about effectiveness of XID cache
4056 */
4057static void
4058DisplayXidCache(void)
4059{
4060 fprintf(stderr,
4061 "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n",
4062 xc_by_recent_xmin,
4063 xc_by_known_xact,
4064 xc_by_my_xact,
4065 xc_by_latest_xid,
4066 xc_by_main_xid,
4067 xc_by_child_xid,
4068 xc_by_known_assigned,
4069 xc_no_overflow,
4070 xc_slow_answer);
4071}
4072#endif /* XIDCACHE_DEBUG */
4073
4074/*
4075 * If rel != NULL, return test state appropriate for relation, otherwise
4076 * return state usable for all relations. The latter may consider XIDs as
4077 * not-yet-visible-to-everyone that a state for a specific relation would
4078 * already consider visible-to-everyone.
4079 *
4080 * This needs to be called while a snapshot is active or registered, otherwise
4081 * there are wraparound and other dangers.
4082 *
4083 * See comment for GlobalVisState for details.
4084 */
4087{
4088 GlobalVisState *state = NULL;
4089
4090 /* XXX: we should assert that a snapshot is pushed or registered */
4092
4093 switch (GlobalVisHorizonKindForRel(rel))
4094 {
4095 case VISHORIZON_SHARED:
4097 break;
4098 case VISHORIZON_CATALOG:
4100 break;
4101 case VISHORIZON_DATA:
4103 break;
4104 case VISHORIZON_TEMP:
4106 break;
4107 }
4108
4109 Assert(FullTransactionIdIsValid(state->definitely_needed) &&
4110 FullTransactionIdIsValid(state->maybe_needed));
4111
4112 return state;
4113}
4114
4115/*
4116 * Return true if it's worth updating the accurate maybe_needed boundary.
4117 *
4118 * As it is somewhat expensive to determine xmin horizons, we don't want to
4119 * repeatedly do so when there is a low likelihood of it being beneficial.
4120 *
4121 * The current heuristic is that we update only if RecentXmin has changed
4122 * since the last update. If the oldest currently running transaction has not
4123 * finished, it is unlikely that recomputing the horizon would be useful.
4124 */
4125static bool
4127{
4128 /* hasn't been updated yet */
4130 return true;
4131
4132 /*
4133 * If the maybe_needed/definitely_needed boundaries are the same, it's
4134 * unlikely to be beneficial to refresh boundaries.
4135 */
4136 if (FullTransactionIdFollowsOrEquals(state->maybe_needed,
4137 state->definitely_needed))
4138 return false;
4139
4140 /* does the last snapshot built have a different xmin? */
4142}
4143
4144static void
4146{
4149 horizons->shared_oldest_nonremovable);
4152 horizons->catalog_oldest_nonremovable);
4155 horizons->data_oldest_nonremovable);
4158 horizons->temp_oldest_nonremovable);
4159
4160 /*
4161 * In longer running transactions it's possible that transactions we
4162 * previously needed to treat as running aren't around anymore. So update
4163 * definitely_needed to not be earlier than maybe_needed.
4164 */
4175
4177}
4178
4179/*
4180 * Update boundaries in GlobalVis{Shared,Catalog, Data}Rels
4181 * using ComputeXidHorizons().
4182 */
4183static void
4185{
4186 ComputeXidHorizonsResult horizons;
4187
4188 /* updates the horizons as a side-effect */
4189 ComputeXidHorizons(&horizons);
4190}
4191
4192/*
4193 * Return true if no snapshot still considers fxid to be running.
4194 *
4195 * The state passed needs to have been initialized for the relation fxid is
4196 * from (NULL is also OK), otherwise the result may not be correct.
4197 *
4198 * See comment for GlobalVisState for details.
4199 */
4200bool
4202 FullTransactionId fxid)
4203{
4204 /*
4205 * If fxid is older than maybe_needed bound, it definitely is visible to
4206 * everyone.
4207 */
4208 if (FullTransactionIdPrecedes(fxid, state->maybe_needed))
4209 return true;
4210
4211 /*
4212 * If fxid is >= definitely_needed bound, it is very likely to still be
4213 * considered running.
4214 */
4215 if (FullTransactionIdFollowsOrEquals(fxid, state->definitely_needed))
4216 return false;
4217
4218 /*
4219 * fxid is between maybe_needed and definitely_needed, i.e. there might or
4220 * might not exist a snapshot considering fxid running. If it makes sense,
4221 * update boundaries and recheck.
4222 */
4224 {
4226
4227 Assert(FullTransactionIdPrecedes(fxid, state->definitely_needed));
4228
4229 return FullTransactionIdPrecedes(fxid, state->maybe_needed);
4230 }
4231 else
4232 return false;
4233}
4234
4235/*
4236 * Wrapper around GlobalVisTestIsRemovableFullXid() for 32bit xids.
4237 *
4238 * It is crucial that this only gets called for xids from a source that
4239 * protects against xid wraparounds (e.g. from a table and thus protected by
4240 * relfrozenxid).
4241 */
4242bool
4244{
4245 FullTransactionId fxid;
4246
4247 /*
4248 * Convert 32 bit argument to FullTransactionId. We can do so safely
4249 * because we know the xid has to, at the very least, be between
4250 * [oldestXid, nextXid), i.e. within 2 billion of xid. To avoid taking a
4251 * lock to determine either, we can just compare with
4252 * state->definitely_needed, which was based on those value at the time
4253 * the current snapshot was built.
4254 */
4255 fxid = FullXidRelativeTo(state->definitely_needed, xid);
4256
4258}
4259
4260/*
4261 * Convenience wrapper around GlobalVisTestFor() and
4262 * GlobalVisTestIsRemovableFullXid(), see their comments.
4263 */
4264bool
4266{
4268
4269 state = GlobalVisTestFor(rel);
4270
4272}
4273
4274/*
4275 * Convenience wrapper around GlobalVisTestFor() and
4276 * GlobalVisTestIsRemovableXid(), see their comments.
4277 */
4278bool
4280{
4282
4283 state = GlobalVisTestFor(rel);
4284
4286}
4287
4288/*
4289 * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it
4290 * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel).
4291 *
4292 * Be very careful about when to use this function. It can only safely be used
4293 * when there is a guarantee that xid is within MaxTransactionId / 2 xids of
4294 * rel. That e.g. can be guaranteed if the caller assures a snapshot is
4295 * held by the backend and xid is from a table (where vacuum/freezing ensures
4296 * the xid has to be within that range), or if xid is from the procarray and
4297 * prevents xid wraparound that way.
4298 */
4299static inline FullTransactionId
4301{
4303
4305 Assert(TransactionIdIsValid(rel_xid));
4306
4307 /* not guaranteed to find issues, but likely to catch mistakes */
4309
4311 + (int32) (xid - rel_xid));
4312}
4313
4314
4315/* ----------------------------------------------
4316 * KnownAssignedTransactionIds sub-module
4317 * ----------------------------------------------
4318 */
4319
4320/*
4321 * In Hot Standby mode, we maintain a list of transactions that are (or were)
4322 * running on the primary at the current point in WAL. These XIDs must be
4323 * treated as running by standby transactions, even though they are not in
4324 * the standby server's PGPROC array.
4325 *
4326 * We record all XIDs that we know have been assigned. That includes all the
4327 * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
4328 * been assigned. We can deduce the existence of unobserved XIDs because we
4329 * know XIDs are assigned in sequence, with no gaps. The KnownAssignedXids
4330 * list expands as new XIDs are observed or inferred, and contracts when
4331 * transaction completion records arrive.
4332 *
4333 * During hot standby we do not fret too much about the distinction between
4334 * top-level XIDs and subtransaction XIDs. We store both together in the
4335 * KnownAssignedXids list. In backends, this is copied into snapshots in
4336 * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot()
4337 * doesn't care about the distinction either. Subtransaction XIDs are
4338 * effectively treated as top-level XIDs and in the typical case pg_subtrans
4339 * links are *not* maintained (which does not affect visibility).
4340 *
4341 * We have room in KnownAssignedXids and in snapshots to hold maxProcs *
4342 * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every primary transaction must
4343 * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at
4344 * least every PGPROC_MAX_CACHED_SUBXIDS. When we receive one of these
4345 * records, we mark the subXIDs as children of the top XID in pg_subtrans,
4346 * and then remove them from KnownAssignedXids. This prevents overflow of
4347 * KnownAssignedXids and snapshots, at the cost that status checks for these
4348 * subXIDs will take a slower path through TransactionIdIsInProgress().
4349 * This means that KnownAssignedXids is not necessarily complete for subXIDs,
4350 * though it should be complete for top-level XIDs; this is the same situation
4351 * that holds with respect to the PGPROC entries in normal running.
4352 *
4353 * When we throw away subXIDs from KnownAssignedXids, we need to keep track of
4354 * that, similarly to tracking overflow of a PGPROC's subxids array. We do
4355 * that by remembering the lastOverflowedXid, ie the last thrown-away subXID.
4356 * As long as that is within the range of interesting XIDs, we have to assume
4357 * that subXIDs are missing from snapshots. (Note that subXID overflow occurs
4358 * on primary when 65th subXID arrives, whereas on standby it occurs when 64th
4359 * subXID arrives - that is not an error.)
4360 *
4361 * Should a backend on primary somehow disappear before it can write an abort
4362 * record, then we just leave those XIDs in KnownAssignedXids. They actually
4363 * aborted but we think they were running; the distinction is irrelevant
4364 * because either way any changes done by the transaction are not visible to
4365 * backends in the standby. We prune KnownAssignedXids when
4366 * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the
4367 * array due to such dead XIDs.
4368 */
4369
4370/*
4371 * RecordKnownAssignedTransactionIds
4372 * Record the given XID in KnownAssignedXids, as well as any preceding
4373 * unobserved XIDs.
4374 *
4375 * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
4376 * associated with a transaction. Must be called for each record after we
4377 * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
4378 *
4379 * Called during recovery in analogy with and in place of GetNewTransactionId()
4380 */
4381void
4383{
4387
4388 elog(DEBUG4, "record known xact %u latestObservedXid %u",
4389 xid, latestObservedXid);
4390
4391 /*
4392 * When a newly observed xid arrives, it is frequently the case that it is
4393 * *not* the next xid in sequence. When this occurs, we must treat the
4394 * intervening xids as running also.
4395 */
4397 {
4398 TransactionId next_expected_xid;
4399
4400 /*
4401 * Extend subtrans like we do in GetNewTransactionId() during normal
4402 * operation using individual extend steps. Note that we do not need
4403 * to extend clog since its extensions are WAL logged.
4404 *
4405 * This part has to be done regardless of standbyState since we
4406 * immediately start assigning subtransactions to their toplevel
4407 * transactions.
4408 */
4409 next_expected_xid = latestObservedXid;
4410 while (TransactionIdPrecedes(next_expected_xid, xid))
4411 {
4412 TransactionIdAdvance(next_expected_xid);
4413 ExtendSUBTRANS(next_expected_xid);
4414 }
4415 Assert(next_expected_xid == xid);
4416
4417 /*
4418 * If the KnownAssignedXids machinery isn't up yet, there's nothing
4419 * more to do since we don't track assigned xids yet.
4420 */
4422 {
4423 latestObservedXid = xid;
4424 return;
4425 }
4426
4427 /*
4428 * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
4429 */
4430 next_expected_xid = latestObservedXid;
4431 TransactionIdAdvance(next_expected_xid);
4432 KnownAssignedXidsAdd(next_expected_xid, xid, false);
4433
4434 /*
4435 * Now we can advance latestObservedXid
4436 */
4437 latestObservedXid = xid;
4438
4439 /* TransamVariables->nextXid must be beyond any observed xid */
4441 }
4442}
4443
4444/*
4445 * ExpireTreeKnownAssignedTransactionIds
4446 * Remove the given XIDs from KnownAssignedXids.
4447 *
4448 * Called during recovery in analogy with and in place of ProcArrayEndTransaction()
4449 */
4450void
4452 TransactionId *subxids, TransactionId max_xid)
4453{
4455
4456 /*
4457 * Uses same locking as transaction commit
4458 */
4459 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4460
4461 KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
4462
4463 /* As in ProcArrayEndTransaction, advance latestCompletedXid */
4465
4466 /* ... and xactCompletionCount */
4468
4469 LWLockRelease(ProcArrayLock);
4470}
4471
4472/*
4473 * ExpireAllKnownAssignedTransactionIds
4474 * Remove all entries in KnownAssignedXids and reset lastOverflowedXid.
4475 */
4476void
4478{
4479 FullTransactionId latestXid;
4480
4481 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4483
4484 /* Reset latestCompletedXid to nextXid - 1 */
4486 latestXid = TransamVariables->nextXid;
4487 FullTransactionIdRetreat(&latestXid);
4489
4490 /*
4491 * Any transactions that were in-progress were effectively aborted, so
4492 * advance xactCompletionCount.
4493 */
4495
4496 /*
4497 * Reset lastOverflowedXid. Currently, lastOverflowedXid has no use after
4498 * the call of this function. But do this for unification with what
4499 * ExpireOldKnownAssignedTransactionIds() do.
4500 */
4502 LWLockRelease(ProcArrayLock);
4503}
4504
4505/*
4506 * ExpireOldKnownAssignedTransactionIds
4507 * Remove KnownAssignedXids entries preceding the given XID and
4508 * potentially reset lastOverflowedXid.
4509 */
4510void
4512{
4513 TransactionId latestXid;
4514
4515 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4516
4517 /* As in ProcArrayEndTransaction, advance latestCompletedXid */
4518 latestXid = xid;
4519 TransactionIdRetreat(latestXid);
4521
4522 /* ... and xactCompletionCount */
4524
4525 /*
4526 * Reset lastOverflowedXid if we know all transactions that have been
4527 * possibly running are being gone. Not doing so could cause an incorrect
4528 * lastOverflowedXid value, which makes extra snapshots be marked as
4529 * suboverflowed.
4530 */
4534 LWLockRelease(ProcArrayLock);
4535}
4536
4537/*
4538 * KnownAssignedTransactionIdsIdleMaintenance
4539 * Opportunistically do maintenance work when the startup process
4540 * is about to go idle.
4541 */
4542void
4544{
4546}
4547
4548
4549/*
4550 * Private module functions to manipulate KnownAssignedXids
4551 *
4552 * There are 5 main uses of the KnownAssignedXids data structure:
4553 *
4554 * * backends taking snapshots - all valid XIDs need to be copied out
4555 * * backends seeking to determine presence of a specific XID
4556 * * startup process adding new known-assigned XIDs
4557 * * startup process removing specific XIDs as transactions end
4558 * * startup process pruning array when special WAL records arrive
4559 *
4560 * This data structure is known to be a hot spot during Hot Standby, so we
4561 * go to some lengths to make these operations as efficient and as concurrent
4562 * as possible.
4563 *
4564 * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes
4565 * order, to be exact --- to allow binary search for specific XIDs. Note:
4566 * in general TransactionIdPrecedes would not provide a total order, but
4567 * we know that the entries present at any instant should not extend across
4568 * a large enough fraction of XID space to wrap around (the primary would
4569 * shut down for fear of XID wrap long before that happens). So it's OK to
4570 * use TransactionIdPrecedes as a binary-search comparator.
4571 *
4572 * It's cheap to maintain the sortedness during insertions, since new known
4573 * XIDs are always reported in XID order; we just append them at the right.
4574 *
4575 * To keep individual deletions cheap, we need to allow gaps in the array.
4576 * This is implemented by marking array elements as valid or invalid using
4577 * the parallel boolean array KnownAssignedXidsValid[]. A deletion is done
4578 * by setting KnownAssignedXidsValid[i] to false, *without* clearing the
4579 * XID entry itself. This preserves the property that the XID entries are
4580 * sorted, so we can do binary searches easily. Periodically we compress
4581 * out the unused entries; that's much cheaper than having to compress the
4582 * array immediately on every deletion.
4583 *
4584 * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
4585 * are those with indexes tail <= i < head; items outside this subscript range
4586 * have unspecified contents. When head reaches the end of the array, we
4587 * force compression of unused entries rather than wrapping around, since
4588 * allowing wraparound would greatly complicate the search logic. We maintain
4589 * an explicit tail pointer so that pruning of old XIDs can be done without
4590 * immediately moving the array contents. In most cases only a small fraction
4591 * of the array contains valid entries at any instant.
4592 *
4593 * Although only the startup process can ever change the KnownAssignedXids
4594 * data structure, we still need interlocking so that standby backends will
4595 * not observe invalid intermediate states. The convention is that backends
4596 * must hold shared ProcArrayLock to examine the array. To remove XIDs from
4597 * the array, the startup process must hold ProcArrayLock exclusively, for
4598 * the usual transactional reasons (compare commit/abort of a transaction
4599 * during normal running). Compressing unused entries out of the array
4600 * likewise requires exclusive lock. To add XIDs to the array, we just insert
4601 * them into slots to the right of the head pointer and then advance the head
4602 * pointer. This doesn't require any lock at all, but on machines with weak
4603 * memory ordering, we need to be careful that other processors see the array
4604 * element changes before they see the head pointer change. We handle this by
4605 * using memory barriers when reading or writing the head/tail pointers (unless
4606 * the caller holds ProcArrayLock exclusively).
4607 *
4608 * Algorithmic analysis:
4609 *
4610 * If we have a maximum of M slots, with N XIDs currently spread across
4611 * S elements then we have N <= S <= M always.
4612 *
4613 * * Adding a new XID is O(1) and needs no lock (unless compression must
4614 * happen)
4615 * * Compressing the array is O(S) and requires exclusive lock
4616 * * Removing an XID is O(logS) and requires exclusive lock
4617 * * Taking a snapshot is O(S) and requires shared lock
4618 * * Checking for an XID is O(logS) and requires shared lock
4619 *
4620 * In comparison, using a hash table for KnownAssignedXids would mean that
4621 * taking snapshots would be O(M). If we can maintain S << M then the
4622 * sorted array technique will deliver significantly faster snapshots.
4623 * If we try to keep S too small then we will spend too much time compressing,
4624 * so there is an optimal point for any workload mix. We use a heuristic to
4625 * decide when to compress the array, though trimming also helps reduce
4626 * frequency of compressing. The heuristic requires us to track the number of
4627 * currently valid XIDs in the array (N). Except in special cases, we'll
4628 * compress when S >= 2N. Bounding S at 2N in turn bounds the time for
4629 * taking a snapshot to be O(N), which it would have to be anyway.
4630 */
4631
4632
4633/*
4634 * Compress KnownAssignedXids by shifting valid data down to the start of the
4635 * array, removing any gaps.
4636 *
4637 * A compression step is forced if "reason" is KAX_NO_SPACE, otherwise
4638 * we do it only if a heuristic indicates it's a good time to do it.
4639 *
4640 * Compression requires holding ProcArrayLock in exclusive mode.
4641 * Caller must pass haveLock = true if it already holds the lock.
4642 */
4643static void
4645{
4646 ProcArrayStruct *pArray = procArray;
4647 int head,
4648 tail,
4649 nelements;
4650 int compress_index;
4651 int i;
4652
4653 /* Counters for compression heuristics */
4654 static unsigned int transactionEndsCounter;
4655 static TimestampTz lastCompressTs;
4656
4657 /* Tuning constants */
4658#define KAX_COMPRESS_FREQUENCY 128 /* in transactions */
4659#define KAX_COMPRESS_IDLE_INTERVAL 1000 /* in ms */
4660
4661 /*
4662 * Since only the startup process modifies the head/tail pointers, we
4663 * don't need a lock to read them here.
4664 */
4665 head = pArray->headKnownAssignedXids;
4666 tail = pArray->tailKnownAssignedXids;
4667 nelements = head - tail;
4668
4669 /*
4670 * If we can choose whether to compress, use a heuristic to avoid
4671 * compressing too often or not often enough. "Compress" here simply
4672 * means moving the values to the beginning of the array, so it is not as
4673 * complex or costly as typical data compression algorithms.
4674 */
4675 if (nelements == pArray->numKnownAssignedXids)
4676 {
4677 /*
4678 * When there are no gaps between head and tail, don't bother to
4679 * compress, except in the KAX_NO_SPACE case where we must compress to
4680 * create some space after the head.
4681 */
4682 if (reason != KAX_NO_SPACE)
4683 return;
4684 }
4685 else if (reason == KAX_TRANSACTION_END)
4686 {
4687 /*
4688 * Consider compressing only once every so many commits. Frequency
4689 * determined by benchmarks.
4690 */
4691 if ((transactionEndsCounter++) % KAX_COMPRESS_FREQUENCY != 0)
4692 return;
4693
4694 /*
4695 * Furthermore, compress only if the used part of the array is less
4696 * than 50% full (see comments above).
4697 */
4698 if (nelements < 2 * pArray->numKnownAssignedXids)
4699 return;
4700 }
4701 else if (reason == KAX_STARTUP_PROCESS_IDLE)
4702 {
4703 /*
4704 * We're about to go idle for lack of new WAL, so we might as well
4705 * compress. But not too often, to avoid ProcArray lock contention
4706 * with readers.
4707 */
4708 if (lastCompressTs != 0)
4709 {
4710 TimestampTz compress_after;
4711
4712 compress_after = TimestampTzPlusMilliseconds(lastCompressTs,
4714 if (GetCurrentTimestamp() < compress_after)
4715 return;
4716 }
4717 }
4718
4719 /* Need to compress, so get the lock if we don't have it. */
4720 if (!haveLock)
4721 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4722
4723 /*
4724 * We compress the array by reading the valid values from tail to head,
4725 * re-aligning data to 0th element.
4726 */
4727 compress_index = 0;
4728 for (i = tail; i < head; i++)
4729 {
4731 {
4732 KnownAssignedXids[compress_index] = KnownAssignedXids[i];
4733 KnownAssignedXidsValid[compress_index] = true;
4734 compress_index++;
4735 }
4736 }
4737 Assert(compress_index == pArray->numKnownAssignedXids);
4738
4739 pArray->tailKnownAssignedXids = 0;
4740 pArray->headKnownAssignedXids = compress_index;
4741
4742 if (!haveLock)
4743 LWLockRelease(ProcArrayLock);
4744
4745 /* Update timestamp for maintenance. No need to hold lock for this. */
4746 lastCompressTs = GetCurrentTimestamp();
4747}
4748
4749/*
4750 * Add xids into KnownAssignedXids at the head of the array.
4751 *
4752 * xids from from_xid to to_xid, inclusive, are added to the array.
4753 *
4754 * If exclusive_lock is true then caller already holds ProcArrayLock in
4755 * exclusive mode, so we need no extra locking here. Else caller holds no
4756 * lock, so we need to be sure we maintain sufficient interlocks against
4757 * concurrent readers. (Only the startup process ever calls this, so no need
4758 * to worry about concurrent writers.)
4759 */
4760static void
4762 bool exclusive_lock)
4763{
4764 ProcArrayStruct *pArray = procArray;
4765 TransactionId next_xid;
4766 int head,
4767 tail;
4768 int nxids;
4769 int i;
4770
4771 Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
4772
4773 /*
4774 * Calculate how many array slots we'll need. Normally this is cheap; in
4775 * the unusual case where the XIDs cross the wrap point, we do it the hard
4776 * way.
4777 */
4778 if (to_xid >= from_xid)
4779 nxids = to_xid - from_xid + 1;
4780 else
4781 {
4782 nxids = 1;
4783 next_xid = from_xid;
4784 while (TransactionIdPrecedes(next_xid, to_xid))
4785 {
4786 nxids++;
4787 TransactionIdAdvance(next_xid);
4788 }
4789 }
4790
4791 /*
4792 * Since only the startup process modifies the head/tail pointers, we
4793 * don't need a lock to read them here.
4794 */
4795 head = pArray->headKnownAssignedXids;
4796 tail = pArray->tailKnownAssignedXids;
4797
4798 Assert(head >= 0 && head <= pArray->maxKnownAssignedXids);
4799 Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids);
4800
4801 /*
4802 * Verify that insertions occur in TransactionId sequence. Note that even
4803 * if the last existing element is marked invalid, it must still have a
4804 * correctly sequenced XID value.
4805 */
4806 if (head > tail &&
4808 {
4810 elog(ERROR, "out-of-order XID insertion in KnownAssignedXids");
4811 }
4812
4813 /*
4814 * If our xids won't fit in the remaining space, compress out free space
4815 */
4816 if (head + nxids > pArray->maxKnownAssignedXids)
4817 {
4818 KnownAssignedXidsCompress(KAX_NO_SPACE, exclusive_lock);
4819
4820 head = pArray->headKnownAssignedXids;
4821 /* note: we no longer care about the tail pointer */
4822
4823 /*
4824 * If it still won't fit then we're out of memory
4825 */
4826 if (head + nxids > pArray->maxKnownAssignedXids)
4827 elog(ERROR, "too many KnownAssignedXids");
4828 }
4829
4830 /* Now we can insert the xids into the space starting at head */
4831 next_xid = from_xid;
4832 for (i = 0; i < nxids; i++)
4833 {
4834 KnownAssignedXids[head] = next_xid;
4835 KnownAssignedXidsValid[head] = true;
4836 TransactionIdAdvance(next_xid);
4837 head++;
4838 }
4839
4840 /* Adjust count of number of valid entries */
4841 pArray->numKnownAssignedXids += nxids;
4842
4843 /*
4844 * Now update the head pointer. We use a write barrier to ensure that
4845 * other processors see the above array updates before they see the head
4846 * pointer change. The barrier isn't required if we're holding
4847 * ProcArrayLock exclusively.
4848 */
4849 if (!exclusive_lock)
4851
4852 pArray->headKnownAssignedXids = head;
4853}
4854
4855/*
4856 * KnownAssignedXidsSearch
4857 *
4858 * Searches KnownAssignedXids for a specific xid and optionally removes it.
4859 * Returns true if it was found, false if not.
4860 *
4861 * Caller must hold ProcArrayLock in shared or exclusive mode.
4862 * Exclusive lock must be held for remove = true.
4863 */
4864static bool
4866{
4867 ProcArrayStruct *pArray = procArray;
4868 int first,
4869 last;
4870 int head;
4871 int tail;
4872 int result_index = -1;
4873
4874 tail = pArray->tailKnownAssignedXids;
4875 head = pArray->headKnownAssignedXids;
4876
4877 /*
4878 * Only the startup process removes entries, so we don't need the read
4879 * barrier in that case.
4880 */
4881 if (!remove)
4882 pg_read_barrier(); /* pairs with KnownAssignedXidsAdd */
4883
4884 /*
4885 * Standard binary search. Note we can ignore the KnownAssignedXidsValid
4886 * array here, since even invalid entries will contain sorted XIDs.
4887 */
4888 first = tail;
4889 last = head - 1;
4890 while (first <= last)
4891 {
4892 int mid_index;
4893 TransactionId mid_xid;
4894
4895 mid_index = (first + last) / 2;
4896 mid_xid = KnownAssignedXids[mid_index];
4897
4898 if (xid == mid_xid)
4899 {
4900 result_index = mid_index;
4901 break;
4902 }
4903 else if (TransactionIdPrecedes(xid, mid_xid))
4904 last = mid_index - 1;
4905 else
4906 first = mid_index + 1;
4907 }
4908
4909 if (result_index < 0)
4910 return false; /* not in array */
4911
4912 if (!KnownAssignedXidsValid[result_index])
4913 return false; /* in array, but invalid */
4914
4915 if (remove)
4916 {
4917 KnownAssignedXidsValid[result_index] = false;
4918
4919 pArray->numKnownAssignedXids--;
4920 Assert(pArray->numKnownAssignedXids >= 0);
4921
4922 /*
4923 * If we're removing the tail element then advance tail pointer over
4924 * any invalid elements. This will speed future searches.
4925 */
4926 if (result_index == tail)
4927 {
4928 tail++;
4929 while (tail < head && !KnownAssignedXidsValid[tail])
4930 tail++;
4931 if (tail >= head)
4932 {
4933 /* Array is empty, so we can reset both pointers */
4934 pArray->headKnownAssignedXids = 0;
4935 pArray->tailKnownAssignedXids = 0;
4936 }
4937 else
4938 {
4939 pArray->tailKnownAssignedXids = tail;
4940 }
4941 }
4942 }
4943
4944 return true;
4945}
4946
4947/*
4948 * Is the specified XID present in KnownAssignedXids[]?
4949 *
4950 * Caller must hold ProcArrayLock in shared or exclusive mode.
4951 */
4952static bool
4954{
4956
4957 return KnownAssignedXidsSearch(xid, false);
4958}
4959
4960/*
4961 * Remove the specified XID from KnownAssignedXids[].
4962 *
4963 * Caller must hold ProcArrayLock in exclusive mode.
4964 */
4965static void
4967{
4969
4970 elog(DEBUG4, "remove KnownAssignedXid %u", xid);
4971
4972 /*
4973 * Note: we cannot consider it an error to remove an XID that's not
4974 * present. We intentionally remove subxact IDs while processing
4975 * XLOG_XACT_ASSIGNMENT, to avoid array overflow. Then those XIDs will be
4976 * removed again when the top-level xact commits or aborts.
4977 *
4978 * It might be possible to track such XIDs to distinguish this case from
4979 * actual errors, but it would be complicated and probably not worth it.
4980 * So, just ignore the search result.
4981 */
4982 (void) KnownAssignedXidsSearch(xid, true);
4983}
4984
4985/*
4986 * KnownAssignedXidsRemoveTree
4987 * Remove xid (if it's not InvalidTransactionId) and all the subxids.
4988 *
4989 * Caller must hold ProcArrayLock in exclusive mode.
4990 */
4991static void
4993 TransactionId *subxids)
4994{
4995 int i;
4996
4997 if (TransactionIdIsValid(xid))
4999
5000 for (i = 0; i < nsubxids; i++)
5001 KnownAssignedXidsRemove(subxids[i]);
5002
5003 /* Opportunistically compress the array */
5005}
5006
5007/*
5008 * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid
5009 * then clear the whole table.
5010 *
5011 * Caller must hold ProcArrayLock in exclusive mode.
5012 */
5013static void
5015{
5016 ProcArrayStruct *pArray = procArray;
5017 int count = 0;
5018 int head,
5019 tail,
5020 i;
5021
5022 if (!TransactionIdIsValid(removeXid))
5023 {
5024 elog(DEBUG4, "removing all KnownAssignedXids");
5025 pArray->numKnownAssignedXids = 0;
5026 pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0;
5027 return;
5028 }
5029
5030 elog(DEBUG4, "prune KnownAssignedXids to %u", removeXid);
5031
5032 /*
5033 * Mark entries invalid starting at the tail. Since array is sorted, we
5034 * can stop as soon as we reach an entry >= removeXid.
5035 */
5036 tail = pArray->tailKnownAssignedXids;
5037 head = pArray->headKnownAssignedXids;
5038
5039 for (i = tail; i < head; i++)
5040 {
5042 {
5043 TransactionId knownXid = KnownAssignedXids[i];
5044
5045 if (TransactionIdFollowsOrEquals(knownXid, removeXid))
5046 break;
5047
5048 if (!StandbyTransactionIdIsPrepared(knownXid))
5049 {
5050 KnownAssignedXidsValid[i] = false;
5051 count++;
5052 }
5053 }
5054 }
5055
5056 pArray->numKnownAssignedXids -= count;
5057 Assert(pArray->numKnownAssignedXids >= 0);
5058
5059 /*
5060 * Advance the tail pointer if we've marked the tail item invalid.
5061 */
5062 for (i = tail; i < head; i++)
5063 {
5065 break;
5066 }
5067 if (i >= head)
5068 {
5069 /* Array is empty, so we can reset both pointers */
5070 pArray->headKnownAssignedXids = 0;
5071 pArray->tailKnownAssignedXids = 0;
5072 }
5073 else
5074 {
5075 pArray->tailKnownAssignedXids = i;
5076 }
5077
5078 /* Opportunistically compress the array */
5080}
5081
5082/*
5083 * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
5084 * We filter out anything >= xmax.
5085 *
5086 * Returns the number of XIDs stored into xarray[]. Caller is responsible
5087 * that array is large enough.
5088 *
5089 * Caller must hold ProcArrayLock in (at least) shared mode.
5090 */
5091static int
5093{
5095
5096 return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax);
5097}
5098
5099/*
5100 * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus
5101 * we reduce *xmin to the lowest xid value seen if not already lower.
5102 *
5103 * Caller must hold ProcArrayLock in (at least) shared mode.
5104 */
5105static int
5107 TransactionId xmax)
5108{
5109 int count = 0;
5110 int head,
5111 tail;
5112 int i;
5113
5114 /*
5115 * Fetch head just once, since it may change while we loop. We can stop
5116 * once we reach the initially seen head, since we are certain that an xid
5117 * cannot enter and then leave the array while we hold ProcArrayLock. We
5118 * might miss newly-added xids, but they should be >= xmax so irrelevant
5119 * anyway.
5120 */
5123
5124 pg_read_barrier(); /* pairs with KnownAssignedXidsAdd */
5125
5126 for (i = tail; i < head; i++)
5127 {
5128 /* Skip any gaps in the array */
5130 {
5131 TransactionId knownXid = KnownAssignedXids[i];
5132
5133 /*
5134 * Update xmin if required. Only the first XID need be checked,
5135 * since the array is sorted.
5136 */
5137 if (count == 0 &&
5138 TransactionIdPrecedes(knownXid, *xmin))
5139 *xmin = knownXid;
5140
5141 /*
5142 * Filter out anything >= xmax, again relying on sorted property
5143 * of array.
5144 */
5145 if (TransactionIdIsValid(xmax) &&
5146 TransactionIdFollowsOrEquals(knownXid, xmax))
5147 break;
5148
5149 /* Add knownXid into output array */
5150 xarray[count++] = knownXid;
5151 }
5152 }
5153
5154 return count;
5155}
5156
5157/*
5158 * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId
5159 * if nothing there.
5160 */
5161static TransactionId
5163{
5164 int head,
5165 tail;
5166 int i;
5167
5168 /*
5169 * Fetch head just once, since it may change while we loop.
5170 */
5173
5174 pg_read_barrier(); /* pairs with KnownAssignedXidsAdd */
5175
5176 for (i = tail; i < head; i++)
5177 {
5178 /* Skip any gaps in the array */
5180 return KnownAssignedXids[i];
5181 }
5182
5183 return InvalidTransactionId;
5184}
5185
5186/*
5187 * Display KnownAssignedXids to provide debug trail
5188 *
5189 * Currently this is only called within startup process, so we need no
5190 * special locking.
5191 *
5192 * Note this is pretty expensive, and much of the expense will be incurred
5193 * even if the elog message will get discarded. It's not currently called
5194 * in any performance-critical places, however, so no need to be tenser.
5195 */
5196static void
5198{
5199 ProcArrayStruct *pArray = procArray;
5201 int head,
5202 tail,
5203 i;
5204 int nxids = 0;
5205
5206 tail = pArray->tailKnownAssignedXids;
5207 head = pArray->headKnownAssignedXids;
5208
5210
5211 for (i = tail; i < head; i++)
5212 {
5214 {
5215 nxids++;
5216 appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
5217 }
5218 }
5219
5220 elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",
5221 nxids,
5222 pArray->numKnownAssignedXids,
5223 pArray->tailKnownAssignedXids,
5224 pArray->headKnownAssignedXids,
5225 buf.data);
5226
5227 pfree(buf.data);
5228}
5229
5230/*
5231 * KnownAssignedXidsReset
5232 * Resets KnownAssignedXids to be empty
5233 */
5234static void
5236{
5237 ProcArrayStruct *pArray = procArray;
5238
5239 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
5240
5241 pArray->numKnownAssignedXids = 0;
5242 pArray->tailKnownAssignedXids = 0;
5243 pArray->headKnownAssignedXids = 0;
5244
5245 LWLockRelease(ProcArrayLock);
5246}
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:5284
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:349
#define pg_read_barrier()
Definition: atomics.h:154
#define pg_write_barrier()
Definition: atomics.h:155
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:274
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:237
static uint32 pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval)
Definition: atomics.h:330
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
void TerminateBackgroundWorkersForDatabase(Oid databaseId)
Definition: bgworker.c:1420
#define likely(x)
Definition: c.h:417
uint8_t uint8
Definition: c.h:550
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:486
int8_t int8
Definition: c.h:546
int32_t int32
Definition: c.h:548
uint64_t uint64
Definition: c.h:553
#define unlikely(x)
Definition: c.h:418
uint32_t uint32
Definition: c.h:552
uint32 TransactionId
Definition: c.h:672
#define OidIsValid(objectId)
Definition: c.h:794
size_t Size
Definition: c.h:625
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:104
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
int64 TimestampTz
Definition: timestamp.h:39
int errdetail(const char *fmt,...)
Definition: elog.c:1216
int errdetail_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1308
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define LOG
Definition: elog.h:31
#define DEBUG3
Definition: elog.h:28
#define FATAL
Definition: elog.h:41
#define WARNING
Definition: elog.h:36
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
#define DEBUG4
Definition: elog.h:27
#define palloc_array(type, count)
Definition: fe_memutils.h:76
bool IsUnderPostmaster
Definition: globals.c:120
Oid MyDatabaseId
Definition: globals.c:94
Assert(PointerIsAligned(start, uint64))
#define IS_INJECTION_POINT_ATTACHED(name)
int j
Definition: isn.c:78
int i
Definition: isn.c:77
List * lappend_int(List *list, int datum)
Definition: list.c:357
#define VirtualTransactionIdIsValid(vxid)
Definition: lock.h:69
#define GET_VXID_FROM_PGPROC(vxid_dst, proc)
Definition: lock.h:79
#define InvalidLocalTransactionId
Definition: lock.h:67
#define VirtualTransactionIdEquals(vxid1, vxid2)
Definition: lock.h:73
char * get_database_name(Oid dbid)
Definition: lsyscache.c:1242
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1981
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1178
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:2025
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1898
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1349
@ LW_SHARED
Definition: lwlock.h:113
@ LW_EXCLUSIVE
Definition: lwlock.h:112
void pfree(void *pointer)
Definition: mcxt.c:1616
#define AmStartupProcess()
Definition: miscadmin.h:390
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:477
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
Oid GetUserId(void)
Definition: miscinit.c:469
static bool pg_lfind32(uint32 key, const uint32 *base, uint32 nelem)
Definition: pg_lfind.h:153
#define NIL
Definition: pg_list.h:68
#define lfirst_int(lc)
Definition: pg_list.h:173
static char buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
#define qsort(a, b, c, d)
Definition: port.h:499
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: posix_sema.c:335
void PGSemaphoreLock(PGSemaphore sema)
Definition: posix_sema.c:315
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
#define PROC_IN_LOGICAL_DECODING
Definition: proc.h:61
#define NUM_AUXILIARY_PROCS
Definition: proc.h:463
#define DELAY_CHKPT_IN_COMMIT
Definition: proc.h:137
#define PROC_XMIN_FLAGS
Definition: proc.h:72
#define PROC_AFFECTS_ALL_HORIZONS
Definition: proc.h:62
#define PROC_IN_VACUUM
Definition: proc.h:58
#define GetPGProcByNumber(n)
Definition: proc.h:440
#define GetNumberFromPGProc(proc)
Definition: proc.h:441
#define PROC_VACUUM_STATE_MASK
Definition: proc.h:65
#define PROC_IS_AUTOVACUUM
Definition: proc.h:57
KAXCompressReason
Definition: procarray.c:263
@ KAX_PRUNE
Definition: procarray.c:265
@ KAX_NO_SPACE
Definition: procarray.c:264
@ KAX_TRANSACTION_END
Definition: procarray.c:266
@ KAX_STARTUP_PROCESS_IDLE
Definition: procarray.c:267
static GlobalVisState GlobalVisDataRels
Definition: procarray.c:301
bool GlobalVisTestIsRemovableFullXid(GlobalVisState *state, FullTransactionId fxid)
Definition: procarray.c:4201
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1955
#define TOTAL_MAX_CACHED_SUBXIDS
static GlobalVisState GlobalVisSharedRels
Definition: procarray.c:299
void ProcArrayGetReplicationSlotXmin(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:3947
static GlobalVisState GlobalVisCatalogRels
Definition: procarray.c:300
VirtualTransactionId * GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, bool allDbs, int excludeVacuum, int *nvxids)
Definition: procarray.c:3287
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4243
bool GlobalVisCheckRemovableFullXid(Relation rel, FullTransactionId fxid)
Definition: procarray.c:4265
static void KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock)
Definition: procarray.c:4644
pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3459
Size ProcArrayShmemSize(void)
Definition: procarray.c:378
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition: procarray.c:2909
void XidCacheRemoveRunningXids(TransactionId xid, int nxids, const TransactionId *xids, TransactionId latestXid)
Definition: procarray.c:3970
static FullTransactionId FullXidRelativeTo(FullTransactionId rel, TransactionId xid)
Definition: procarray.c:4300
bool MinimumActiveBackends(int min)
Definition: procarray.c:3508
void TerminateOtherDBBackends(Oid databaseId)
Definition: procarray.c:3807
#define xc_no_overflow_inc()
Definition: procarray.c:344
static TransactionId standbySnapshotPendingXmin
Definition: procarray.c:292
void ExpireAllKnownAssignedTransactionIds(void)
Definition: procarray.c:4477
#define UINT32_ACCESS_ONCE(var)
Definition: procarray.c:70
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2639
static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:4992
static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, TransactionId xmax)
Definition: procarray.c:5106
#define xc_by_recent_xmin_inc()
Definition: procarray.c:337
void ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:669
void ProcNumberGetTransactionIds(ProcNumber procNumber, TransactionId *xid, TransactionId *xmin, int *nsubxid, bool *overflowed)
Definition: procarray.c:3123
static PGPROC * allProcs
Definition: procarray.c:273
void RecordKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:4382
static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
Definition: procarray.c:5092
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:1984
static TransactionId latestObservedXid
Definition: procarray.c:285
static ProcArrayStruct * procArray
Definition: procarray.c:271
int GetMaxSnapshotSubxidCount(void)
Definition: procarray.c:2030
int CountDBConnections(Oid databaseid)
Definition: procarray.c:3590
static GlobalVisState GlobalVisTempRels
Definition: procarray.c:302
#define xc_by_my_xact_inc()
Definition: procarray.c:339
#define xc_by_known_assigned_inc()
Definition: procarray.c:343
struct ProcArrayStruct ProcArrayStruct
void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
Definition: procarray.c:3621
#define PROCARRAY_MAXPROCS
void GetReplicationHorizons(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:1997
static bool GlobalVisTestShouldUpdate(GlobalVisState *state)
Definition: procarray.c:4126
static void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:733
static void KnownAssignedXidsRemovePreceding(TransactionId removeXid)
Definition: procarray.c:5014
void ProcArrayAdd(PGPROC *proc)
Definition: procarray.c:470
struct ComputeXidHorizonsResult ComputeXidHorizonsResult
static TransactionId * KnownAssignedXids
Definition: procarray.c:283
#define xc_by_child_xid_inc()
Definition: procarray.c:342
pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
Definition: procarray.c:3453
Snapshot GetSnapshotData(Snapshot snapshot)
Definition: procarray.c:2125
static bool * KnownAssignedXidsValid
Definition: procarray.c:284
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition: procarray.c:3052
static void KnownAssignedXidsRemove(TransactionId xid)
Definition: procarray.c:4966
void KnownAssignedTransactionIdsIdleMaintenance(void)
Definition: procarray.c:4543
static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
Definition: procarray.c:4145
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:2019
int CountDBBackends(Oid databaseid)
Definition: procarray.c:3561
PGPROC * BackendPidGetProcWithLock(int pid)
Definition: procarray.c:3182
bool GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
Definition: procarray.c:4279
#define MAXAUTOVACPIDS
PGPROC * BackendPidGetProc(int pid)
Definition: procarray.c:3159
bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
Definition: procarray.c:2566
PGPROC * ProcNumberGetProc(ProcNumber procNumber)
Definition: procarray.c:3101
#define KAX_COMPRESS_FREQUENCY
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4086
static TransactionId KnownAssignedXidsGetOldestXmin(void)
Definition: procarray.c:5162
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1056
void ProcArrayClearTransaction(PGPROC *proc)
Definition: procarray.c:909
int CountUserBackends(Oid roleid)
Definition: procarray.c:3662
static TransactionId ComputeXidHorizonsResultLastXmin
Definition: procarray.c:309
static void GlobalVisUpdate(void)
Definition: procarray.c:4184
#define xc_slow_answer_inc()
Definition: procarray.c:345
static void KnownAssignedXidsDisplay(int trace_level)
Definition: procarray.c:5197
#define xc_by_main_xid_inc()
Definition: procarray.c:341
static void MaintainLatestCompletedXidRecovery(TransactionId latestXid)
Definition: procarray.c:991
static void ComputeXidHorizons(ComputeXidHorizonsResult *h)
Definition: procarray.c:1685
void ProcArrayApplyXidAssignment(TransactionId topxid, int nsubxids, TransactionId *subxids)
Definition: procarray.c:1320
static bool KnownAssignedXidExists(TransactionId xid)
Definition: procarray.c:4953
TransactionId GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs)
Definition: procarray.c:2835
void ProcArrayShmemInit(void)
Definition: procarray.c:420
bool CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
Definition: procarray.c:3714
GlobalVisHorizonKind
Definition: procarray.c:252
@ VISHORIZON_SHARED
Definition: procarray.c:253
@ VISHORIZON_DATA
Definition: procarray.c:255
@ VISHORIZON_CATALOG
Definition: procarray.c:254
@ VISHORIZON_TEMP
Definition: procarray.c:256
int BackendXidGetPid(TransactionId xid)
Definition: procarray.c:3219
#define xc_by_latest_xid_inc()
Definition: procarray.c:340
bool IsBackendPid(int pid)
Definition: procarray.c:3254
#define xc_by_known_xact_inc()
Definition: procarray.c:338
static bool KnownAssignedXidsSearch(TransactionId xid, bool remove)
Definition: procarray.c:4865
static void KnownAssignedXidsReset(void)
Definition: procarray.c:5235
static GlobalVisHorizonKind GlobalVisHorizonKindForRel(Relation rel)
Definition: procarray.c:1921
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
Definition: procarray.c:3922
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1025
void ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:567
#define KAX_COMPRESS_IDLE_INTERVAL
VirtualTransactionId * GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
Definition: procarray.c:3379
static void MaintainLatestCompletedXid(TransactionId latestXid)
Definition: procarray.c:969
static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
Definition: procarray.c:794
void ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, TransactionId *subxids, TransactionId max_xid)
Definition: procarray.c:4451
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition: procarray.c:3007
static TransactionId cachedXidIsNotInProgress
Definition: procarray.c:278
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
Definition: procarray.c:2482
static bool GetSnapshotDataReuse(Snapshot snapshot)
Definition: procarray.c:2045
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, bool exclusive_lock)
Definition: procarray.c:4761
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1404
void ExpireOldKnownAssignedTransactionIds(TransactionId xid)
Definition: procarray.c:4511
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int ProcNumber
Definition: procnumber.h:24
int SendProcSignal(pid_t pid, ProcSignalReason reason, ProcNumber procNumber)
Definition: procsignal.c:284
ProcSignalReason
Definition: procsignal.h:31
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:658
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:694
Size add_size(Size s1, Size s2)
Definition: shmem.c:495
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:389
void pg_usleep(long microsec)
Definition: signal.c:53
TransactionId RecentXmin
Definition: snapmgr.c:160
TransactionId TransactionXmin
Definition: snapmgr.c:159
#define malloc(a)
PGPROC * MyProc
Definition: proc.c:67
PROC_HDR * ProcGlobal
Definition: proc.c:79
void StandbyReleaseOldLocks(TransactionId oldxid)
Definition: standby.c:1130
@ SUBXIDS_IN_SUBTRANS
Definition: standby.h:82
@ SUBXIDS_MISSING
Definition: standby.h:81
@ SUBXIDS_IN_ARRAY
Definition: standby.h:80
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
TransactionId slot_catalog_xmin
Definition: procarray.c:194
TransactionId data_oldest_nonremovable
Definition: procarray.c:239
TransactionId temp_oldest_nonremovable
Definition: procarray.c:245
TransactionId shared_oldest_nonremovable
Definition: procarray.c:216
TransactionId oldest_considered_running
Definition: procarray.c:207
TransactionId slot_xmin
Definition: procarray.c:193
FullTransactionId latest_completed
Definition: procarray.c:187
TransactionId catalog_oldest_nonremovable
Definition: procarray.c:233
TransactionId shared_oldest_nonremovable_raw
Definition: procarray.c:227
FullTransactionId definitely_needed
Definition: procarray.c:172
FullTransactionId maybe_needed
Definition: procarray.c:175
Definition: pg_list.h:54
Definition: proc.h:179
bool isRegularBackend
Definition: proc.h:230
TransactionId xmin
Definition: proc.h:194
bool procArrayGroupMember
Definition: proc.h:286
LocalTransactionId lxid
Definition: proc.h:217
pg_atomic_uint32 procArrayGroupNext
Definition: proc.h:288
struct PGPROC::@130 vxid
uint8 statusFlags
Definition: proc.h:259
bool recoveryConflictPending
Definition: proc.h:237
Oid databaseId
Definition: proc.h:224
ProcNumber procNumber
Definition: proc.h:212
int pid
Definition: proc.h:199
int pgxactoff
Definition: proc.h:201
XidCacheStatus subxidStatus
Definition: proc.h:280
LOCK * waitLock
Definition: proc.h:249
TransactionId xid
Definition: proc.h:189
struct XidCache subxids
Definition: proc.h:282
int delayChkptFlags
Definition: proc.h:257
TransactionId procArrayGroupMemberXid
Definition: proc.h:294
PGSemaphore sem
Definition: proc.h:183
Oid roleId
Definition: proc.h:225
Definition: proc.h:386
uint8 * statusFlags
Definition: proc.h:403
XidCacheStatus * subxidStates
Definition: proc.h:397
PGPROC * allProcs
Definition: proc.h:388
TransactionId * xids
Definition: proc.h:391
pg_atomic_uint32 procArrayGroupFirst
Definition: proc.h:416
uint32 allProcCount
Definition: proc.h:406
TransactionId replication_slot_xmin
Definition: procarray.c:96
int maxKnownAssignedXids
Definition: procarray.c:81
TransactionId replication_slot_catalog_xmin
Definition: procarray.c:98
int numKnownAssignedXids
Definition: procarray.c:82
int pgprocnos[FLEXIBLE_ARRAY_MEMBER]
Definition: procarray.c:101
TransactionId lastOverflowedXid
Definition: procarray.c:93
int tailKnownAssignedXids
Definition: procarray.c:83
int headKnownAssignedXids
Definition: procarray.c:84
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId nextXid
Definition: standby.h:91
TransactionId oldestDatabaseRunningXid
Definition: standby.h:93
TransactionId latestCompletedXid
Definition: standby.h:95
subxids_array_status subxid_status
Definition: standby.h:90
TransactionId * xids
Definition: standby.h:97
TransactionId xmin
Definition: snapshot.h:153
int32 subxcnt
Definition: snapshot.h:177
bool copied
Definition: snapshot.h:181
uint32 regd_count
Definition: snapshot.h:201
uint32 active_count
Definition: snapshot.h:200
CommandId curcid
Definition: snapshot.h:183
uint32 xcnt
Definition: snapshot.h:165
TransactionId * subxip
Definition: snapshot.h:176
uint64 snapXactCompletionCount
Definition: snapshot.h:209
TransactionId xmax
Definition: snapshot.h:154
TransactionId * xip
Definition: snapshot.h:164
bool suboverflowed
Definition: snapshot.h:178
bool takenDuringRecovery
Definition: snapshot.h:180
FullTransactionId latestCompletedXid
Definition: transam.h:238
FullTransactionId nextXid
Definition: transam.h:220
uint64 xactCompletionCount
Definition: transam.h:248
TransactionId oldestXid
Definition: transam.h:222
LocalTransactionId localTransactionId
Definition: lock.h:64
ProcNumber procNumber
Definition: lock.h:63
bool overflowed
Definition: proc.h:46
uint8 count
Definition: proc.h:44
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]
Definition: proc.h:51
Definition: type.h:96
Definition: regguts.h:323
void SubTransSetParent(TransactionId xid, TransactionId parent)
Definition: subtrans.c:84
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:162
void ExtendSUBTRANS(TransactionId newestXact)
Definition: subtrans.c:353
bool superuser_arg(Oid roleid)
Definition: superuser.c:56
bool superuser(void)
Definition: superuser.c:46
TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids)
Definition: transam.c:281
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:188
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.h:297
#define FullTransactionIdIsNormal(x)
Definition: transam.h:58
static FullTransactionId FullTransactionIdNewer(FullTransactionId a, FullTransactionId b)
Definition: transam.h:422
#define TransactionIdRetreat(dest)
Definition: transam.h:141
#define InvalidTransactionId
Definition: transam.h:31
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition: transam.h:103
#define U64FromFullTransactionId(x)
Definition: transam.h:49
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.h:282
static FullTransactionId FullTransactionIdFromU64(uint64 value)
Definition: transam.h:81
#define FullTransactionIdFollowsOrEquals(a, b)
Definition: transam.h:54
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.h:312
#define AssertTransactionIdInAllowableRange(xid)
Definition: transam.h:363
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define NormalTransactionIdPrecedes(id1, id2)
Definition: transam.h:147
#define XidFromFullTransactionId(x)
Definition: transam.h:48
static void FullTransactionIdAdvance(FullTransactionId *dest)
Definition: transam.h:128
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
#define TransactionIdAdvance(dest)
Definition: transam.h:91
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
#define FullTransactionIdIsValid(x)
Definition: transam.h:55
static TransactionId TransactionIdOlder(TransactionId a, TransactionId b)
Definition: transam.h:396
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.h:263
bool StandbyTransactionIdIsPrepared(TransactionId xid)
Definition: twophase.c:1467
#define TimestampTzPlusMilliseconds(tz, ms)
Definition: timestamp.h:85
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition: varsup.c:304
TransamVariablesData * TransamVariables
Definition: varsup.c:34
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:69
static void pgstat_report_wait_end(void)
Definition: wait_event.h:85
const char * type
#define kill(pid, sig)
Definition: win32_port.h:490
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:942
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:830
int xidLogicalComparator(const void *arg1, const void *arg2)
Definition: xid.c:169
bool RecoveryInProgress(void)
Definition: xlog.c:6461
bool EnableHotStandby
Definition: xlog.c:124
HotStandbyState standbyState
Definition: xlogutils.c:53
@ STANDBY_SNAPSHOT_READY
Definition: xlogutils.h:55
@ STANDBY_SNAPSHOT_PENDING
Definition: xlogutils.h:54
@ STANDBY_INITIALIZED
Definition: xlogutils.h:53