PostgreSQL Source Code git master
Loading...
Searching...
No Matches
snapmgr.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * snapmgr.c
4 * PostgreSQL snapshot manager
5 *
6 * The following functions return an MVCC snapshot that can be used in tuple
7 * visibility checks:
8 *
9 * - GetTransactionSnapshot
10 * - GetLatestSnapshot
11 * - GetCatalogSnapshot
12 * - GetNonHistoricCatalogSnapshot
13 *
14 * Each of these functions returns a reference to a statically allocated
15 * snapshot. The statically allocated snapshot is subject to change on any
16 * snapshot-related function call, and should not be used directly. Instead,
17 * call PushActiveSnapshot() or RegisterSnapshot() to create a longer-lived
18 * copy and use that.
19 *
20 * We keep track of snapshots in two ways: those "registered" by resowner.c,
21 * and the "active snapshot" stack. All snapshots in either of them live in
22 * persistent memory. When a snapshot is no longer in any of these lists
23 * (tracked by separate refcounts on each snapshot), its memory can be freed.
24 *
25 * In addition to the above-mentioned MVCC snapshots, there are some special
26 * snapshots like SnapshotSelf, SnapshotAny, and "dirty" snapshots. They can
27 * only be used in limited contexts and cannot be registered or pushed to the
28 * active stack.
29 *
30 * ActiveSnapshot stack
31 * --------------------
32 *
33 * Most visibility checks use the current "active snapshot" returned by
34 * GetActiveSnapshot(). When running normal queries, the active snapshot is
35 * set when query execution begins based on the transaction isolation level.
36 *
37 * The active snapshot is tracked in a stack so that the currently active one
38 * is at the top of the stack. It mirrors the process call stack: whenever we
39 * recurse or switch context to fetch rows from a different portal for
40 * example, the appropriate snapshot is pushed to become the active snapshot,
41 * and popped on return. Once upon a time, ActiveSnapshot was just a global
42 * variable that was saved and restored similar to CurrentMemoryContext, but
43 * nowadays it's managed as a separate data structure so that we can keep
44 * track of which snapshots are in use and reset MyProc->xmin when there is no
45 * active snapshot.
46 *
47 * However, there are a couple of exceptions where the active snapshot stack
48 * does not strictly mirror the call stack:
49 *
50 * - VACUUM and a few other utility commands manage their own transactions,
51 * which take their own snapshots. They are called with an active snapshot
52 * set, like most utility commands, but they pop the active snapshot that
53 * was pushed by the caller. PortalRunUtility knows about the possibility
54 * that the snapshot it pushed is no longer active on return.
55 *
56 * - When COMMIT or ROLLBACK is executed within a procedure or DO-block, the
57 * active snapshot stack is destroyed, and re-established later when
58 * subsequent statements in the procedure are executed. There are many
59 * limitations on when in-procedure COMMIT/ROLLBACK is allowed; one such
60 * limitation is that all the snapshots on the active snapshot stack are
61 * known to portals that are being executed, which makes it safe to reset
62 * the stack. See EnsurePortalSnapshotExists().
63 *
64 * Registered snapshots
65 * --------------------
66 *
67 * In addition to snapshots pushed to the active snapshot stack, a snapshot
68 * can be registered with a resource owner.
69 *
70 * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
71 * regd_count and list it in RegisteredSnapshots, but this reference is not
72 * tracked by a resource owner. We used to use the TopTransactionResourceOwner
73 * to track this snapshot reference, but that introduces logical circularity
74 * and thus makes it impossible to clean up in a sane fashion. It's better to
75 * handle this reference as an internally-tracked registration, so that this
76 * module is entirely lower-level than ResourceOwners.
77 *
78 * Likewise, any snapshots that have been exported by pg_export_snapshot
79 * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
80 * tracked by any resource owner.
81 *
82 * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
83 * is valid, but is not tracked by any resource owner.
84 *
85 * The same is true for historic snapshots used during logical decoding,
86 * their lifetime is managed separately (as they live longer than one xact.c
87 * transaction).
88 *
89 * These arrangements let us reset MyProc->xmin when there are no snapshots
90 * referenced by this transaction, and advance it when the one with oldest
91 * Xmin is no longer referenced. For simplicity however, only registered
92 * snapshots not active snapshots participate in tracking which one is oldest;
93 * we don't try to change MyProc->xmin except when the active-snapshot
94 * stack is empty.
95 *
96 *
97 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
98 * Portions Copyright (c) 1994, Regents of the University of California
99 *
100 * IDENTIFICATION
101 * src/backend/utils/time/snapmgr.c
102 *
103 *-------------------------------------------------------------------------
104 */
105#include "postgres.h"
106
107#include <sys/stat.h>
108#include <unistd.h>
109
110#include "access/subtrans.h"
111#include "access/transam.h"
112#include "access/xact.h"
113#include "datatype/timestamp.h"
114#include "lib/pairingheap.h"
115#include "miscadmin.h"
116#include "port/pg_lfind.h"
117#include "storage/fd.h"
118#include "storage/predicate.h"
119#include "storage/proc.h"
120#include "storage/procarray.h"
121#include "utils/builtins.h"
123#include "utils/memutils.h"
124#include "utils/resowner.h"
125#include "utils/snapmgr.h"
126#include "utils/syscache.h"
127
128
129/*
130 * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
131 * mode, and to the latest one taken in a read-committed transaction.
132 * SecondarySnapshot is a snapshot that's always up-to-date as of the current
133 * instant, even in transaction-snapshot mode. It should only be used for
134 * special-purpose code (say, RI checking.) CatalogSnapshot points to an
135 * MVCC snapshot intended to be used for catalog scans; we must invalidate it
136 * whenever a system catalog change occurs.
137 *
138 * These SnapshotData structs are static to simplify memory allocation
139 * (see the hack in GetSnapshotData to avoid repeated malloc/free).
140 */
147
148/* Pointers to valid snapshots */
153
154/*
155 * These are updated by GetSnapshotData. We initialize them this way
156 * for the convenience of TransactionIdIsInProgress: even in bootstrap
157 * mode, we don't want it to say that BootstrapTransactionId is in progress.
158 */
161
162/* (table, ctid) => (cmin, cmax) mapping during timetravel */
164
165/*
166 * Elements of the active snapshot stack.
167 *
168 * Each element here accounts for exactly one active_count on SnapshotData.
169 *
170 * NB: the code assumes that elements in this list are in non-increasing
171 * order of as_level; also, the list must be NULL-terminated.
172 */
179
180/* Top of the stack of active snapshots */
182
183/*
184 * Currently registered Snapshots. Ordered in a heap by xmin, so that we can
185 * quickly find the one with lowest xmin, to advance our MyProc->xmin.
186 */
187static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
188 void *arg);
189
191
192/* first GetTransactionSnapshot call in a transaction? */
193bool FirstSnapshotSet = false;
194
195/*
196 * Remember the serializable transaction snapshot, if any. We cannot trust
197 * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
198 * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
199 */
201
202/* Define pathname of exported-snapshot files */
203#define SNAPSHOT_EXPORT_DIR "pg_snapshots"
204
205/* Structure holding info about exported snapshot. */
211
212/* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
214
215/* Prototypes for local functions */
216static Snapshot CopySnapshot(Snapshot snapshot);
217static void UnregisterSnapshotNoOwner(Snapshot snapshot);
218static void FreeSnapshot(Snapshot snapshot);
219static void SnapshotResetXmin(void);
220
221/* ResourceOwner callbacks to track snapshot references */
222static void ResOwnerReleaseSnapshot(Datum res);
223
225{
226 .name = "snapshot reference",
227 .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
228 .release_priority = RELEASE_PRIO_SNAPSHOT_REFS,
229 .ReleaseResource = ResOwnerReleaseSnapshot,
230 .DebugPrint = NULL /* the default message is fine */
231};
232
233/* Convenience wrappers over ResourceOwnerRemember/Forget */
234static inline void
239static inline void
244
245/*
246 * Snapshot fields to be serialized.
247 *
248 * Only these fields need to be sent to the cooperating backend; the
249 * remaining ones can (and must) be set by the receiver upon restore.
250 */
261
262/*
263 * GetTransactionSnapshot
264 * Get the appropriate snapshot for a new query in a transaction.
265 *
266 * Note that the return value points at static storage that will be modified
267 * by future calls and by CommandCounterIncrement(). Callers must call
268 * RegisterSnapshot or PushActiveSnapshot on the returned snap before doing
269 * any other non-trivial work that could invalidate it.
270 */
273{
274 /*
275 * Return historic snapshot if doing logical decoding.
276 *
277 * Historic snapshots are only usable for catalog access, not for
278 * general-purpose queries. The caller is responsible for ensuring that
279 * the snapshot is used correctly! (PostgreSQL code never calls this
280 * during logical decoding, but extensions can do it.)
281 */
283 {
284 /*
285 * We'll never need a non-historic transaction snapshot in this
286 * (sub-)transaction, so there's no need to be careful to set one up
287 * for later calls to GetTransactionSnapshot().
288 */
290 return HistoricSnapshot;
291 }
292
293 /* First call in transaction? */
294 if (!FirstSnapshotSet)
295 {
296 /*
297 * Don't allow catalog snapshot to be older than xact snapshot. Must
298 * do this first to allow the empty-heap Assert to succeed.
299 */
301
304
305 if (IsInParallelMode())
306 elog(ERROR,
307 "cannot take query snapshot during a parallel operation");
308
309 /*
310 * In transaction-snapshot mode, the first snapshot must live until
311 * end of xact regardless of what the caller does with it, so we must
312 * make a copy of it rather than returning CurrentSnapshotData
313 * directly. Furthermore, if we're running in serializable mode,
314 * predicate.c needs to wrap the snapshot fetch in its own processing.
315 */
317 {
318 /* First, create the snapshot in CurrentSnapshotData */
321 else
323 /* Make a saved copy */
326 /* Mark it as "registered" in FirstXactSnapshot */
329 }
330 else
332
333 FirstSnapshotSet = true;
334 return CurrentSnapshot;
335 }
336
338 return CurrentSnapshot;
339
340 /* Don't allow catalog snapshot to be older than xact snapshot. */
342
344
345 return CurrentSnapshot;
346}
347
348/*
349 * GetLatestSnapshot
350 * Get a snapshot that is up-to-date as of the current instant,
351 * even if we are executing in transaction-snapshot mode.
352 */
355{
356 /*
357 * We might be able to relax this, but nothing that could otherwise work
358 * needs it.
359 */
360 if (IsInParallelMode())
361 elog(ERROR,
362 "cannot update SecondarySnapshot during a parallel operation");
363
364 /*
365 * So far there are no cases requiring support for GetLatestSnapshot()
366 * during logical decoding, but it wouldn't be hard to add if required.
367 */
369
370 /* If first call in transaction, go ahead and set the xact snapshot */
371 if (!FirstSnapshotSet)
372 return GetTransactionSnapshot();
373
375
376 return SecondarySnapshot;
377}
378
379/*
380 * GetCatalogSnapshot
381 * Get a snapshot that is sufficiently up-to-date for scan of the
382 * system catalog with the specified OID.
383 */
386{
387 /*
388 * Return historic snapshot while we're doing logical decoding, so we can
389 * see the appropriate state of the catalog.
390 *
391 * This is the primary reason for needing to reset the system caches after
392 * finishing decoding.
393 */
395 return HistoricSnapshot;
396
397 return GetNonHistoricCatalogSnapshot(relid);
398}
399
400/*
401 * GetNonHistoricCatalogSnapshot
402 * Get a snapshot that is sufficiently up-to-date for scan of the system
403 * catalog with the specified OID, even while historic snapshots are set
404 * up.
405 */
408{
409 /*
410 * If the caller is trying to scan a relation that has no syscache, no
411 * catcache invalidations will be sent when it is updated. For a few key
412 * relations, snapshot invalidations are sent instead. If we're trying to
413 * scan a relation for which neither catcache nor snapshot invalidations
414 * are sent, we must refresh the snapshot every time.
415 */
416 if (CatalogSnapshot &&
418 !RelationHasSysCache(relid))
420
421 if (CatalogSnapshot == NULL)
422 {
423 /* Get new snapshot. */
425
426 /*
427 * Make sure the catalog snapshot will be accounted for in decisions
428 * about advancing PGPROC->xmin. We could apply RegisterSnapshot, but
429 * that would result in making a physical copy, which is overkill; and
430 * it would also create a dependency on some resource owner, which we
431 * do not want for reasons explained at the head of this file. Instead
432 * just shove the CatalogSnapshot into the pairing heap manually. This
433 * has to be reversed in InvalidateCatalogSnapshot, of course.
434 *
435 * NB: it had better be impossible for this to throw error, since the
436 * CatalogSnapshot pointer is already valid.
437 */
439 }
440
441 return CatalogSnapshot;
442}
443
444/*
445 * InvalidateCatalogSnapshot
446 * Mark the current catalog snapshot, if any, as invalid
447 *
448 * We could change this API to allow the caller to provide more fine-grained
449 * invalidation details, so that a change to relation A wouldn't prevent us
450 * from using our cached snapshot to scan relation B, but so far there's no
451 * evidence that the CPU cycles we spent tracking such fine details would be
452 * well-spent.
453 */
454void
456{
457 if (CatalogSnapshot)
458 {
462 INJECTION_POINT("invalidate-catalog-snapshot-end", NULL);
463 }
464}
465
466/*
467 * InvalidateCatalogSnapshotConditionally
468 * Drop catalog snapshot if it's the only one we have
469 *
470 * This is called when we are about to wait for client input, so we don't
471 * want to continue holding the catalog snapshot if it might mean that the
472 * global xmin horizon can't advance. However, if there are other snapshots
473 * still active or registered, the catalog snapshot isn't likely to be the
474 * oldest one, so we might as well keep it.
475 */
476void
484
485/*
486 * SnapshotSetCommandId
487 * Propagate CommandCounterIncrement into the static snapshots, if set
488 */
489void
491{
492 if (!FirstSnapshotSet)
493 return;
494
495 if (CurrentSnapshot)
496 CurrentSnapshot->curcid = curcid;
498 SecondarySnapshot->curcid = curcid;
499 /* Should we do the same with CatalogSnapshot? */
500}
501
502/*
503 * SetTransactionSnapshot
504 * Set the transaction's snapshot from an imported MVCC snapshot.
505 *
506 * Note that this is very closely tied to GetTransactionSnapshot --- it
507 * must take care of all the same considerations as the first-snapshot case
508 * in GetTransactionSnapshot.
509 */
510static void
513{
514 /* Caller should have checked this already */
516
517 /* Better do this to ensure following Assert succeeds. */
519
523
524 /*
525 * Even though we are not going to use the snapshot it computes, we must
526 * call GetSnapshotData, for two reasons: (1) to be sure that
527 * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
528 * the state for GlobalVis*.
529 */
531
532 /*
533 * Now copy appropriate fields from the source snapshot.
534 */
539 if (sourcesnap->xcnt > 0)
541 sourcesnap->xcnt * sizeof(TransactionId));
544 if (sourcesnap->subxcnt > 0)
546 sourcesnap->subxcnt * sizeof(TransactionId));
547 CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
548 CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
549 /* NB: curcid should NOT be copied, it's a local matter */
550
552
553 /*
554 * Now we have to fix what GetSnapshotData did with MyProc->xmin and
555 * TransactionXmin. There is a race condition: to make sure we are not
556 * causing the global xmin to go backwards, we have to test that the
557 * source transaction is still running, and that has to be done
558 * atomically. So let procarray.c do it.
559 *
560 * Note: in serializable mode, predicate.c will do this a second time. It
561 * doesn't seem worth contorting the logic here to avoid two calls,
562 * especially since it's not clear that predicate.c *must* do this.
563 */
564 if (sourceproc != NULL)
565 {
569 errmsg("could not import the requested snapshot"),
570 errdetail("The source transaction is not running anymore.")));
571 }
575 errmsg("could not import the requested snapshot"),
576 errdetail("The source process with PID %d is not running anymore.",
577 sourcepid)));
578
579 /*
580 * In transaction-snapshot mode, the first snapshot must live until end of
581 * xact, so we must make a copy of it. Furthermore, if we're running in
582 * serializable mode, predicate.c needs to do its own processing.
583 */
585 {
588 sourcepid);
589 /* Make a saved copy */
592 /* Mark it as "registered" in FirstXactSnapshot */
595 }
596
597 FirstSnapshotSet = true;
598}
599
600/*
601 * CopySnapshot
602 * Copy the given snapshot.
603 *
604 * The copy is palloc'd in TopTransactionContext and has initial refcounts set
605 * to 0. The returned snapshot has the copied flag set.
606 */
607static Snapshot
609{
612 Size size;
613
614 Assert(snapshot != InvalidSnapshot);
615
616 /* We allocate any XID arrays needed in the same palloc block. */
617 size = subxipoff = sizeof(SnapshotData) +
618 snapshot->xcnt * sizeof(TransactionId);
619 if (snapshot->subxcnt > 0)
620 size += snapshot->subxcnt * sizeof(TransactionId);
621
623 memcpy(newsnap, snapshot, sizeof(SnapshotData));
624
625 newsnap->regd_count = 0;
626 newsnap->active_count = 0;
627 newsnap->copied = true;
628 newsnap->snapXactCompletionCount = 0;
629
630 /* setup XID array */
631 if (snapshot->xcnt > 0)
632 {
633 newsnap->xip = (TransactionId *) (newsnap + 1);
634 memcpy(newsnap->xip, snapshot->xip,
635 snapshot->xcnt * sizeof(TransactionId));
636 }
637 else
638 newsnap->xip = NULL;
639
640 /*
641 * Setup subXID array. Don't bother to copy it if it had overflowed,
642 * though, because it's not used anywhere in that case. Except if it's a
643 * snapshot taken during recovery; all the top-level XIDs are in subxip as
644 * well in that case, so we mustn't lose them.
645 */
646 if (snapshot->subxcnt > 0 &&
647 (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
648 {
649 newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
650 memcpy(newsnap->subxip, snapshot->subxip,
651 snapshot->subxcnt * sizeof(TransactionId));
652 }
653 else
654 newsnap->subxip = NULL;
655
656 return newsnap;
657}
658
659/*
660 * FreeSnapshot
661 * Free the memory associated with a snapshot.
662 */
663static void
665{
666 Assert(snapshot->regd_count == 0);
667 Assert(snapshot->active_count == 0);
668 Assert(snapshot->copied);
669
670 pfree(snapshot);
671}
672
673/*
674 * PushActiveSnapshot
675 * Set the given snapshot as the current active snapshot
676 *
677 * If the passed snapshot is a statically-allocated one, or it is possibly
678 * subject to a future command counter update, create a new long-lived copy
679 * with active refcount=1. Otherwise, only increment the refcount.
680 */
681void
686
687/*
688 * PushActiveSnapshotWithLevel
689 * Set the given snapshot as the current active snapshot
690 *
691 * Same as PushActiveSnapshot except that caller can specify the
692 * transaction nesting level that "owns" the snapshot. This level
693 * must not be deeper than the current top of the snapshot stack.
694 */
695void
697{
699
700 Assert(snapshot != InvalidSnapshot);
702
704
705 /*
706 * Checking SecondarySnapshot is probably useless here, but it seems
707 * better to be sure.
708 */
709 if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot ||
710 !snapshot->copied)
711 newactive->as_snap = CopySnapshot(snapshot);
712 else
713 newactive->as_snap = snapshot;
714
715 newactive->as_next = ActiveSnapshot;
717
718 newactive->as_snap->active_count++;
719
721}
722
723/*
724 * PushCopiedSnapshot
725 * As above, except forcibly copy the presented snapshot.
726 *
727 * This should be used when the ActiveSnapshot has to be modifiable, for
728 * example if the caller intends to call UpdateActiveSnapshotCommandId.
729 * The new snapshot will be released when popped from the stack.
730 */
731void
733{
735}
736
737/*
738 * UpdateActiveSnapshotCommandId
739 *
740 * Update the current CID of the active snapshot. This can only be applied
741 * to a snapshot that is not referenced elsewhere.
742 */
743void
745{
747 curcid;
748
752
753 /*
754 * Don't allow modification of the active snapshot during parallel
755 * operation. We share the snapshot to worker backends at the beginning
756 * of parallel operation, so any change to the snapshot can lead to
757 * inconsistencies. We have other defenses against
758 * CommandCounterIncrement, but there are a few places that call this
759 * directly, so we put an additional guard here.
760 */
762 curcid = GetCurrentCommandId(false);
763 if (IsInParallelMode() && save_curcid != curcid)
764 elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
765 ActiveSnapshot->as_snap->curcid = curcid;
766}
767
768/*
769 * PopActiveSnapshot
770 *
771 * Remove the topmost snapshot from the active snapshot stack, decrementing the
772 * reference count, and free it if this was the last reference.
773 */
774void
794
795/*
796 * GetActiveSnapshot
797 * Return the topmost snapshot in the Active stack.
798 */
801{
803
804 return ActiveSnapshot->as_snap;
805}
806
807/*
808 * ActiveSnapshotSet
809 * Return whether there is at least one snapshot in the Active stack
810 */
811bool
813{
814 return ActiveSnapshot != NULL;
815}
816
817/*
818 * RegisterSnapshot
819 * Register a snapshot as being in use by the current resource owner
820 *
821 * If InvalidSnapshot is passed, it is not registered.
822 */
825{
826 if (snapshot == InvalidSnapshot)
827 return InvalidSnapshot;
828
830}
831
832/*
833 * RegisterSnapshotOnOwner
834 * As above, but use the specified resource owner
835 */
838{
840
841 if (snapshot == InvalidSnapshot)
842 return InvalidSnapshot;
843
844 /* Static snapshot? Create a persistent copy */
845 snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
846
847 /* and tell resowner.c about it */
849 snap->regd_count++;
851
852 if (snap->regd_count == 1)
854
855 return snap;
856}
857
858/*
859 * UnregisterSnapshot
860 *
861 * Decrement the reference count of a snapshot, remove the corresponding
862 * reference from CurrentResourceOwner, and free the snapshot if no more
863 * references remain.
864 */
865void
867{
868 if (snapshot == NULL)
869 return;
870
872}
873
874/*
875 * UnregisterSnapshotFromOwner
876 * As above, but use the specified resource owner
877 */
878void
880{
881 if (snapshot == NULL)
882 return;
883
884 ResourceOwnerForgetSnapshot(owner, snapshot);
886}
887
888static void
890{
891 Assert(snapshot->regd_count > 0);
893
894 snapshot->regd_count--;
895 if (snapshot->regd_count == 0)
897
898 if (snapshot->regd_count == 0 && snapshot->active_count == 0)
899 {
900 FreeSnapshot(snapshot);
902 }
903}
904
905/*
906 * Comparison function for RegisteredSnapshots heap. Snapshots are ordered
907 * by xmin, so that the snapshot with smallest xmin is at the top.
908 */
909static int
911{
914
915 if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
916 return 1;
917 else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
918 return -1;
919 else
920 return 0;
921}
922
923/*
924 * SnapshotResetXmin
925 *
926 * If there are no more snapshots, we can reset our PGPROC->xmin to
927 * InvalidTransactionId. Note we can do this without locking because we assume
928 * that storing an Xid is atomic.
929 *
930 * Even if there are some remaining snapshots, we may be able to advance our
931 * PGPROC->xmin to some degree. This typically happens when a portal is
932 * dropped. For efficiency, we only consider recomputing PGPROC->xmin when
933 * the active snapshot stack is empty; this allows us not to need to track
934 * which active snapshot is oldest.
935 */
936static void
956
957/*
958 * AtSubCommit_Snapshot
959 */
960void
962{
963 ActiveSnapshotElt *active;
964
965 /*
966 * Relabel the active snapshots set in this subtransaction as though they
967 * are owned by the parent subxact.
968 */
969 for (active = ActiveSnapshot; active != NULL; active = active->as_next)
970 {
971 if (active->as_level < level)
972 break;
973 active->as_level = level - 1;
974 }
975}
976
977/*
978 * AtSubAbort_Snapshot
979 * Clean up snapshots after a subtransaction abort
980 */
981void
983{
984 /* Forget the active snapshots set by this subtransaction */
985 while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
986 {
988
990
991 /*
992 * Decrement the snapshot's active count. If it's still registered or
993 * marked as active by an outer subtransaction, we can't free it yet.
994 */
997
1001
1002 /* and free the stack element */
1004
1006 }
1007
1009}
1010
1011/*
1012 * AtEOXact_Snapshot
1013 * Snapshot manager's cleanup function for end of transaction
1014 */
1015void
1017{
1018 /*
1019 * In transaction-snapshot mode we must release our privately-managed
1020 * reference to the transaction snapshot. We must remove it from
1021 * RegisteredSnapshots to keep the check below happy. But we don't bother
1022 * to do FreeSnapshot, for two reasons: the memory will go away with
1023 * TopTransactionContext anyway, and if someone has left the snapshot
1024 * stacked as active, we don't want the code below to be chasing through a
1025 * dangling pointer.
1026 */
1027 if (FirstXactSnapshot != NULL)
1028 {
1032 }
1034
1035 /*
1036 * If we exported any snapshots, clean them up.
1037 */
1038 if (exportedSnapshots != NIL)
1039 {
1040 ListCell *lc;
1041
1042 /*
1043 * Get rid of the files. Unlink failure is only a WARNING because (1)
1044 * it's too late to abort the transaction, and (2) leaving a leaked
1045 * file around has little real consequence anyway.
1046 *
1047 * We also need to remove the snapshots from RegisteredSnapshots to
1048 * prevent a warning below.
1049 *
1050 * As with the FirstXactSnapshot, we don't need to free resources of
1051 * the snapshot itself as it will go away with the memory context.
1052 */
1053 foreach(lc, exportedSnapshots)
1054 {
1056
1057 if (unlink(esnap->snapfile))
1058 elog(WARNING, "could not unlink file \"%s\": %m",
1059 esnap->snapfile);
1060
1062 &esnap->snapshot->ph_node);
1063 }
1064
1066 }
1067
1068 /* Drop catalog snapshot if any */
1070
1071 /* On commit, complain about leftover snapshots */
1072 if (isCommit)
1073 {
1074 ActiveSnapshotElt *active;
1075
1077 elog(WARNING, "registered snapshots seem to remain after cleanup");
1078
1079 /* complain about unpopped active snapshots */
1080 for (active = ActiveSnapshot; active != NULL; active = active->as_next)
1081 elog(WARNING, "snapshot %p still active", active);
1082 }
1083
1084 /*
1085 * And reset our state. We don't need to free the memory explicitly --
1086 * it'll go away with TopTransactionContext.
1087 */
1090
1093
1094 FirstSnapshotSet = false;
1095
1096 /*
1097 * During normal commit processing, we call ProcArrayEndTransaction() to
1098 * reset the MyProc->xmin. That call happens prior to the call to
1099 * AtEOXact_Snapshot(), so we need not touch xmin here at all.
1100 */
1101 if (resetXmin)
1103
1104 Assert(resetXmin || MyProc->xmin == 0);
1105}
1106
1107
1108/*
1109 * ExportSnapshot
1110 * Export the snapshot to a file so that other backends can import it.
1111 * Returns the token (the file name) that can be used to import this
1112 * snapshot.
1113 */
1114char *
1116{
1117 TransactionId topXid;
1118 TransactionId *children;
1120 int nchildren;
1121 int addTopXid;
1123 FILE *f;
1124 int i;
1126 char path[MAXPGPATH];
1127 char pathtmp[MAXPGPATH];
1128
1129 /*
1130 * It's tempting to call RequireTransactionBlock here, since it's not very
1131 * useful to export a snapshot that will disappear immediately afterwards.
1132 * However, we haven't got enough information to do that, since we don't
1133 * know if we're at top level or not. For example, we could be inside a
1134 * plpgsql function that is going to fire off other transactions via
1135 * dblink. Rather than disallow perfectly legitimate usages, don't make a
1136 * check.
1137 *
1138 * Also note that we don't make any restriction on the transaction's
1139 * isolation level; however, importers must check the level if they are
1140 * serializable.
1141 */
1142
1143 /*
1144 * Get our transaction ID if there is one, to include in the snapshot.
1145 */
1146 topXid = GetTopTransactionIdIfAny();
1147
1148 /*
1149 * We cannot export a snapshot from a subtransaction because there's no
1150 * easy way for importers to verify that the same subtransaction is still
1151 * running.
1152 */
1153 if (IsSubTransaction())
1154 ereport(ERROR,
1156 errmsg("cannot export a snapshot from a subtransaction")));
1157
1158 /*
1159 * We do however allow previous committed subtransactions to exist.
1160 * Importers of the snapshot must see them as still running, so get their
1161 * XIDs to add them to the snapshot.
1162 */
1164
1165 /*
1166 * Generate file path for the snapshot. We start numbering of snapshots
1167 * inside the transaction from 1.
1168 */
1169 snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
1172
1173 /*
1174 * Copy the snapshot into TopTransactionContext, add it to the
1175 * exportedSnapshots list, and mark it pseudo-registered. We do this to
1176 * ensure that the snapshot's xmin is honored for the rest of the
1177 * transaction.
1178 */
1179 snapshot = CopySnapshot(snapshot);
1180
1183 esnap->snapfile = pstrdup(path);
1184 esnap->snapshot = snapshot;
1187
1188 snapshot->regd_count++;
1190
1191 /*
1192 * Fill buf with a text serialization of the snapshot, plus identification
1193 * data about this transaction. The format expected by ImportSnapshot is
1194 * pretty rigid: each line must be fieldname:value.
1195 */
1197
1198 appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->vxid.procNumber, MyProc->vxid.lxid);
1199 appendStringInfo(&buf, "pid:%d\n", MyProcPid);
1200 appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
1201 appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
1202 appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
1203
1204 appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
1205 appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
1206
1207 /*
1208 * We must include our own top transaction ID in the top-xid data, since
1209 * by definition we will still be running when the importing transaction
1210 * adopts the snapshot, but GetSnapshotData never includes our own XID in
1211 * the snapshot. (There must, therefore, be enough room to add it.)
1212 *
1213 * However, it could be that our topXid is after the xmax, in which case
1214 * we shouldn't include it because xip[] members are expected to be before
1215 * xmax. (We need not make the same check for subxip[] members, see
1216 * snapshot.h.)
1217 */
1218 addTopXid = (TransactionIdIsValid(topXid) &&
1219 TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
1220 appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
1221 for (i = 0; i < snapshot->xcnt; i++)
1222 appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
1223 if (addTopXid)
1224 appendStringInfo(&buf, "xip:%u\n", topXid);
1225
1226 /*
1227 * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
1228 * we have to cope with possible overflow.
1229 */
1230 if (snapshot->suboverflowed ||
1232 appendStringInfoString(&buf, "sof:1\n");
1233 else
1234 {
1235 appendStringInfoString(&buf, "sof:0\n");
1236 appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
1237 for (i = 0; i < snapshot->subxcnt; i++)
1238 appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
1239 for (i = 0; i < nchildren; i++)
1240 appendStringInfo(&buf, "sxp:%u\n", children[i]);
1241 }
1242 appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
1243
1244 /*
1245 * Now write the text representation into a file. We first write to a
1246 * ".tmp" filename, and rename to final filename if no error. This
1247 * ensures that no other backend can read an incomplete file
1248 * (ImportSnapshot won't allow it because of its valid-characters check).
1249 */
1250 snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
1251 if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
1252 ereport(ERROR,
1254 errmsg("could not create file \"%s\": %m", pathtmp)));
1255
1256 if (fwrite(buf.data, buf.len, 1, f) != 1)
1257 ereport(ERROR,
1259 errmsg("could not write to file \"%s\": %m", pathtmp)));
1260
1261 /* no fsync() since file need not survive a system crash */
1262
1263 if (FreeFile(f))
1264 ereport(ERROR,
1266 errmsg("could not write to file \"%s\": %m", pathtmp)));
1267
1268 /*
1269 * Now that we have written everything into a .tmp file, rename the file
1270 * to remove the .tmp suffix.
1271 */
1272 if (rename(pathtmp, path) < 0)
1273 ereport(ERROR,
1275 errmsg("could not rename file \"%s\" to \"%s\": %m",
1276 pathtmp, path)));
1277
1278 /*
1279 * The basename of the file is what we return from pg_export_snapshot().
1280 * It's already in path in a textual format and we know that the path
1281 * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash
1282 * and pstrdup it so as not to return the address of a local variable.
1283 */
1284 return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
1285}
1286
1287/*
1288 * pg_export_snapshot
1289 * SQL-callable wrapper for ExportSnapshot.
1290 */
1291Datum
1299
1300
1301/*
1302 * Parsing subroutines for ImportSnapshot: parse a line with the given
1303 * prefix followed by a value, and advance *s to the next line. The
1304 * filename is provided for use in error messages.
1305 */
1306static int
1307parseIntFromText(const char *prefix, char **s, const char *filename)
1308{
1309 char *ptr = *s;
1310 int prefixlen = strlen(prefix);
1311 int val;
1312
1313 if (strncmp(ptr, prefix, prefixlen) != 0)
1314 ereport(ERROR,
1316 errmsg("invalid snapshot data in file \"%s\"", filename)));
1317 ptr += prefixlen;
1318 if (sscanf(ptr, "%d", &val) != 1)
1319 ereport(ERROR,
1321 errmsg("invalid snapshot data in file \"%s\"", filename)));
1322 ptr = strchr(ptr, '\n');
1323 if (!ptr)
1324 ereport(ERROR,
1326 errmsg("invalid snapshot data in file \"%s\"", filename)));
1327 *s = ptr + 1;
1328 return val;
1329}
1330
1331static TransactionId
1332parseXidFromText(const char *prefix, char **s, const char *filename)
1333{
1334 char *ptr = *s;
1335 int prefixlen = strlen(prefix);
1337
1338 if (strncmp(ptr, prefix, prefixlen) != 0)
1339 ereport(ERROR,
1341 errmsg("invalid snapshot data in file \"%s\"", filename)));
1342 ptr += prefixlen;
1343 if (sscanf(ptr, "%u", &val) != 1)
1344 ereport(ERROR,
1346 errmsg("invalid snapshot data in file \"%s\"", filename)));
1347 ptr = strchr(ptr, '\n');
1348 if (!ptr)
1349 ereport(ERROR,
1351 errmsg("invalid snapshot data in file \"%s\"", filename)));
1352 *s = ptr + 1;
1353 return val;
1354}
1355
1356static void
1357parseVxidFromText(const char *prefix, char **s, const char *filename,
1359{
1360 char *ptr = *s;
1361 int prefixlen = strlen(prefix);
1362
1363 if (strncmp(ptr, prefix, prefixlen) != 0)
1364 ereport(ERROR,
1366 errmsg("invalid snapshot data in file \"%s\"", filename)));
1367 ptr += prefixlen;
1368 if (sscanf(ptr, "%d/%u", &vxid->procNumber, &vxid->localTransactionId) != 2)
1369 ereport(ERROR,
1371 errmsg("invalid snapshot data in file \"%s\"", filename)));
1372 ptr = strchr(ptr, '\n');
1373 if (!ptr)
1374 ereport(ERROR,
1376 errmsg("invalid snapshot data in file \"%s\"", filename)));
1377 *s = ptr + 1;
1378}
1379
1380/*
1381 * ImportSnapshot
1382 * Import a previously exported snapshot. The argument should be a
1383 * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file.
1384 * This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1385 */
1386void
1388{
1389 char path[MAXPGPATH];
1390 FILE *f;
1391 struct stat stat_buf;
1392 char *filebuf;
1393 int xcnt;
1394 int i;
1396 int src_pid;
1397 Oid src_dbid;
1398 int src_isolevel;
1399 bool src_readonly;
1400 SnapshotData snapshot;
1401
1402 /*
1403 * Must be at top level of a fresh transaction. Note in particular that
1404 * we check we haven't acquired an XID --- if we have, it's conceivable
1405 * that the snapshot would show it as not running, making for very screwy
1406 * behavior.
1407 */
1408 if (FirstSnapshotSet ||
1411 ereport(ERROR,
1413 errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1414
1415 /*
1416 * If we are in read committed mode then the next query would execute with
1417 * a new snapshot thus making this function call quite useless.
1418 */
1420 ereport(ERROR,
1422 errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1423
1424 /*
1425 * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do
1426 * this mainly to prevent reading arbitrary files.
1427 */
1428 if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
1429 ereport(ERROR,
1431 errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1432
1433 /* OK, read the file */
1435
1436 f = AllocateFile(path, PG_BINARY_R);
1437 if (!f)
1438 {
1439 /*
1440 * If file is missing while identifier has a correct format, avoid
1441 * system errors.
1442 */
1443 if (errno == ENOENT)
1444 ereport(ERROR,
1446 errmsg("snapshot \"%s\" does not exist", idstr)));
1447 else
1448 ereport(ERROR,
1450 errmsg("could not open file \"%s\" for reading: %m",
1451 path)));
1452 }
1453
1454 /* get the size of the file so that we know how much memory we need */
1455 if (fstat(fileno(f), &stat_buf))
1456 elog(ERROR, "could not stat file \"%s\": %m", path);
1457
1458 /* and read the file into a palloc'd string */
1459 filebuf = (char *) palloc(stat_buf.st_size + 1);
1460 if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
1461 elog(ERROR, "could not read file \"%s\": %m", path);
1462
1463 filebuf[stat_buf.st_size] = '\0';
1464
1465 FreeFile(f);
1466
1467 /*
1468 * Construct a snapshot struct by parsing the file content.
1469 */
1470 memset(&snapshot, 0, sizeof(snapshot));
1471
1472 parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
1473 src_pid = parseIntFromText("pid:", &filebuf, path);
1474 /* we abuse parseXidFromText a bit here ... */
1475 src_dbid = parseXidFromText("dbid:", &filebuf, path);
1476 src_isolevel = parseIntFromText("iso:", &filebuf, path);
1477 src_readonly = parseIntFromText("ro:", &filebuf, path);
1478
1479 snapshot.snapshot_type = SNAPSHOT_MVCC;
1480
1481 snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
1482 snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
1483
1484 snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1485
1486 /* sanity-check the xid count before palloc */
1488 ereport(ERROR,
1490 errmsg("invalid snapshot data in file \"%s\"", path)));
1491
1492 snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1493 for (i = 0; i < xcnt; i++)
1494 snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
1495
1496 snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1497
1498 if (!snapshot.suboverflowed)
1499 {
1500 snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1501
1502 /* sanity-check the xid count before palloc */
1504 ereport(ERROR,
1506 errmsg("invalid snapshot data in file \"%s\"", path)));
1507
1508 snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1509 for (i = 0; i < xcnt; i++)
1510 snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
1511 }
1512 else
1513 {
1514 snapshot.subxcnt = 0;
1515 snapshot.subxip = NULL;
1516 }
1517
1518 snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1519
1520 /*
1521 * Do some additional sanity checking, just to protect ourselves. We
1522 * don't trouble to check the array elements, just the most critical
1523 * fields.
1524 */
1526 !OidIsValid(src_dbid) ||
1527 !TransactionIdIsNormal(snapshot.xmin) ||
1528 !TransactionIdIsNormal(snapshot.xmax))
1529 ereport(ERROR,
1531 errmsg("invalid snapshot data in file \"%s\"", path)));
1532
1533 /*
1534 * If we're serializable, the source transaction must be too, otherwise
1535 * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a
1536 * non-read-only transaction can't adopt a snapshot from a read-only
1537 * transaction, as predicate.c handles the cases very differently.
1538 */
1540 {
1542 ereport(ERROR,
1544 errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
1545 if (src_readonly && !XactReadOnly)
1546 ereport(ERROR,
1548 errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1549 }
1550
1551 /*
1552 * We cannot import a snapshot that was taken in a different database,
1553 * because vacuum calculates OldestXmin on a per-database basis; so the
1554 * source transaction's xmin doesn't protect us from data loss. This
1555 * restriction could be removed if the source transaction were to mark its
1556 * xmin as being globally applicable. But that would require some
1557 * additional syntax, since that has to be known when the snapshot is
1558 * initially taken. (See pgsql-hackers discussion of 2011-10-21.)
1559 */
1560 if (src_dbid != MyDatabaseId)
1561 ereport(ERROR,
1563 errmsg("cannot import a snapshot from a different database")));
1564
1565 /* OK, install the snapshot */
1567}
1568
1569/*
1570 * XactHasExportedSnapshots
1571 * Test whether current transaction has exported any snapshots.
1572 */
1573bool
1575{
1576 return (exportedSnapshots != NIL);
1577}
1578
1579/*
1580 * DeleteAllExportedSnapshotFiles
1581 * Clean up any files that have been left behind by a crashed backend
1582 * that had exported snapshots before it died.
1583 *
1584 * This should be called during database startup or crash recovery.
1585 */
1586void
1588{
1589 char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
1590 DIR *s_dir;
1591 struct dirent *s_de;
1592
1593 /*
1594 * Problems in reading the directory, or unlinking files, are reported at
1595 * LOG level. Since we're running in the startup process, ERROR level
1596 * would prevent database start, and it's not important enough for that.
1597 */
1599
1601 {
1602 if (strcmp(s_de->d_name, ".") == 0 ||
1603 strcmp(s_de->d_name, "..") == 0)
1604 continue;
1605
1606 snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1607
1608 if (unlink(buf) != 0)
1609 ereport(LOG,
1611 errmsg("could not remove file \"%s\": %m", buf)));
1612 }
1613
1614 FreeDir(s_dir);
1615}
1616
1617/*
1618 * ThereAreNoPriorRegisteredSnapshots
1619 * Is the registered snapshot count less than or equal to one?
1620 *
1621 * Don't use this to settle important decisions. While zero registrations and
1622 * no ActiveSnapshot would confirm a certain idleness, the system makes no
1623 * guarantees about the significance of one registered snapshot.
1624 */
1625bool
1627{
1630 return true;
1631
1632 return false;
1633}
1634
1635/*
1636 * HaveRegisteredOrActiveSnapshot
1637 * Is there any registered or active snapshot?
1638 *
1639 * NB: Unless pushed or active, the cached catalog snapshot will not cause
1640 * this function to return true. That allows this function to be used in
1641 * checks enforcing a longer-lived snapshot.
1642 */
1643bool
1645{
1646 if (ActiveSnapshot != NULL)
1647 return true;
1648
1649 /*
1650 * The catalog snapshot is in RegisteredSnapshots when valid, but can be
1651 * removed at any time due to invalidation processing. If explicitly
1652 * registered more than one snapshot has to be in RegisteredSnapshots.
1653 */
1654 if (CatalogSnapshot != NULL &&
1656 return false;
1657
1659}
1660
1661
1662/*
1663 * Setup a snapshot that replaces normal catalog snapshots that allows catalog
1664 * access to behave just like it did at a certain point in the past.
1665 *
1666 * Needed for logical decoding.
1667 */
1668void
1670{
1672
1673 /* setup the timetravel snapshot */
1675
1676 /* setup (cmin, cmax) lookup hash */
1677 tuplecid_data = tuplecids;
1678}
1679
1680
1681/*
1682 * Make catalog snapshots behave normally again.
1683 */
1684void
1690
1691bool
1693{
1694 return HistoricSnapshot != NULL;
1695}
1696
1697HTAB *
1703
1704/*
1705 * EstimateSnapshotSpace
1706 * Returns the size needed to store the given snapshot.
1707 *
1708 * We are exporting only required fields from the Snapshot, stored in
1709 * SerializedSnapshotData.
1710 */
1711Size
1713{
1714 Size size;
1715
1716 Assert(snapshot != InvalidSnapshot);
1717 Assert(snapshot->snapshot_type == SNAPSHOT_MVCC);
1718
1719 /* We allocate any XID arrays needed in the same palloc block. */
1720 size = add_size(sizeof(SerializedSnapshotData),
1721 mul_size(snapshot->xcnt, sizeof(TransactionId)));
1722 if (snapshot->subxcnt > 0 &&
1723 (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
1724 size = add_size(size,
1725 mul_size(snapshot->subxcnt, sizeof(TransactionId)));
1726
1727 return size;
1728}
1729
1730/*
1731 * SerializeSnapshot
1732 * Dumps the serialized snapshot (extracted from given snapshot) onto the
1733 * memory location at start_address.
1734 */
1735void
1737{
1739
1740 Assert(snapshot->subxcnt >= 0);
1741
1742 /* Copy all required fields */
1743 serialized_snapshot.xmin = snapshot->xmin;
1744 serialized_snapshot.xmax = snapshot->xmax;
1745 serialized_snapshot.xcnt = snapshot->xcnt;
1746 serialized_snapshot.subxcnt = snapshot->subxcnt;
1747 serialized_snapshot.suboverflowed = snapshot->suboverflowed;
1748 serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
1749 serialized_snapshot.curcid = snapshot->curcid;
1750
1751 /*
1752 * Ignore the SubXID array if it has overflowed, unless the snapshot was
1753 * taken during recovery - in that case, top-level XIDs are in subxip as
1754 * well, and we mustn't lose them.
1755 */
1756 if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
1757 serialized_snapshot.subxcnt = 0;
1758
1759 /* Copy struct to possibly-unaligned buffer */
1762
1763 /* Copy XID array */
1764 if (snapshot->xcnt > 0)
1766 sizeof(SerializedSnapshotData)),
1767 snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
1768
1769 /*
1770 * Copy SubXID array. Don't bother to copy it if it had overflowed,
1771 * though, because it's not used anywhere in that case. Except if it's a
1772 * snapshot taken during recovery; all the top-level XIDs are in subxip as
1773 * well in that case, so we mustn't lose them.
1774 */
1775 if (serialized_snapshot.subxcnt > 0)
1776 {
1778 snapshot->xcnt * sizeof(TransactionId);
1779
1781 snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
1782 }
1783}
1784
1785/*
1786 * RestoreSnapshot
1787 * Restore a serialized snapshot from the specified address.
1788 *
1789 * The copy is palloc'd in TopTransactionContext and has initial refcounts set
1790 * to 0. The returned snapshot has the copied flag set.
1791 */
1794{
1796 Size size;
1797 Snapshot snapshot;
1799
1801 sizeof(SerializedSnapshotData));
1804
1805 /* We allocate any XID arrays needed in the same palloc block. */
1806 size = sizeof(SnapshotData)
1807 + serialized_snapshot.xcnt * sizeof(TransactionId)
1808 + serialized_snapshot.subxcnt * sizeof(TransactionId);
1809
1810 /* Copy all required fields */
1812 snapshot->snapshot_type = SNAPSHOT_MVCC;
1813 snapshot->xmin = serialized_snapshot.xmin;
1814 snapshot->xmax = serialized_snapshot.xmax;
1815 snapshot->xip = NULL;
1816 snapshot->xcnt = serialized_snapshot.xcnt;
1817 snapshot->subxip = NULL;
1818 snapshot->subxcnt = serialized_snapshot.subxcnt;
1819 snapshot->suboverflowed = serialized_snapshot.suboverflowed;
1820 snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
1821 snapshot->curcid = serialized_snapshot.curcid;
1822 snapshot->snapXactCompletionCount = 0;
1823
1824 /* Copy XIDs, if present. */
1825 if (serialized_snapshot.xcnt > 0)
1826 {
1827 snapshot->xip = (TransactionId *) (snapshot + 1);
1828 memcpy(snapshot->xip, serialized_xids,
1829 serialized_snapshot.xcnt * sizeof(TransactionId));
1830 }
1831
1832 /* Copy SubXIDs, if present. */
1833 if (serialized_snapshot.subxcnt > 0)
1834 {
1835 snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
1838 serialized_snapshot.subxcnt * sizeof(TransactionId));
1839 }
1840
1841 /* Set the copied flag so that the caller will set refcounts correctly. */
1842 snapshot->regd_count = 0;
1843 snapshot->active_count = 0;
1844 snapshot->copied = true;
1845
1846 return snapshot;
1847}
1848
1849/*
1850 * Install a restored snapshot as the transaction snapshot.
1851 */
1852void
1857
1858/*
1859 * XidInMVCCSnapshot
1860 * Is the given XID still-in-progress according to the snapshot?
1861 *
1862 * Note: GetSnapshotData never stores either top xid or subxids of our own
1863 * backend into a snapshot, so these xids will not be reported as "running"
1864 * by this function. This is OK for current uses, because we always check
1865 * TransactionIdIsCurrentTransactionId first, except when it's known the
1866 * XID could not be ours anyway.
1867 */
1868bool
1870{
1871 /*
1872 * Make a quick range check to eliminate most XIDs without looking at the
1873 * xip arrays. Note that this is OK even if we convert a subxact XID to
1874 * its parent below, because a subxact with XID < xmin has surely also got
1875 * a parent with XID < xmin, while one with XID >= xmax must belong to a
1876 * parent that was not yet committed at the time of this snapshot.
1877 */
1878
1879 /* Any xid < xmin is not in-progress */
1880 if (TransactionIdPrecedes(xid, snapshot->xmin))
1881 return false;
1882 /* Any xid >= xmax is in-progress */
1883 if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
1884 return true;
1885
1886 /*
1887 * Snapshot information is stored slightly differently in snapshots taken
1888 * during recovery.
1889 */
1890 if (!snapshot->takenDuringRecovery)
1891 {
1892 /*
1893 * If the snapshot contains full subxact data, the fastest way to
1894 * check things is just to compare the given XID against both subxact
1895 * XIDs and top-level XIDs. If the snapshot overflowed, we have to
1896 * use pg_subtrans to convert a subxact XID to its parent XID, but
1897 * then we need only look at top-level XIDs not subxacts.
1898 */
1899 if (!snapshot->suboverflowed)
1900 {
1901 /* we have full data, so search subxip */
1902 if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1903 return true;
1904
1905 /* not there, fall through to search xip[] */
1906 }
1907 else
1908 {
1909 /*
1910 * Snapshot overflowed, so convert xid to top-level. This is safe
1911 * because we eliminated too-old XIDs above.
1912 */
1914
1915 /*
1916 * If xid was indeed a subxact, we might now have an xid < xmin,
1917 * so recheck to avoid an array scan. No point in rechecking
1918 * xmax.
1919 */
1920 if (TransactionIdPrecedes(xid, snapshot->xmin))
1921 return false;
1922 }
1923
1924 if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
1925 return true;
1926 }
1927 else
1928 {
1929 /*
1930 * In recovery we store all xids in the subxip array because it is by
1931 * far the bigger array, and we mostly don't know which xids are
1932 * top-level and which are subxacts. The xip array is empty.
1933 *
1934 * We start by searching subtrans, if we overflowed.
1935 */
1936 if (snapshot->suboverflowed)
1937 {
1938 /*
1939 * Snapshot overflowed, so convert xid to top-level. This is safe
1940 * because we eliminated too-old XIDs above.
1941 */
1943
1944 /*
1945 * If xid was indeed a subxact, we might now have an xid < xmin,
1946 * so recheck to avoid an array scan. No point in rechecking
1947 * xmax.
1948 */
1949 if (TransactionIdPrecedes(xid, snapshot->xmin))
1950 return false;
1951 }
1952
1953 /*
1954 * We now have either a top-level xid higher than xmin or an
1955 * indeterminate xid. We don't know whether it's top level or subxact
1956 * but it doesn't matter. If it's present, the xid is visible.
1957 */
1958 if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1959 return true;
1960 }
1961
1962 return false;
1963}
1964
1965/* ResourceOwner callbacks */
1966
1967static void
static int32 next
Definition blutils.c:225
#define PG_BINARY_R
Definition c.h:1283
#define Assert(condition)
Definition c.h:883
int32_t int32
Definition c.h:552
uint32_t uint32
Definition c.h:556
#define PG_BINARY_W
Definition c.h:1284
uint32 CommandId
Definition c.h:690
uint32 TransactionId
Definition c.h:676
#define OidIsValid(objectId)
Definition c.h:798
size_t Size
Definition c.h:629
int errcode_for_file_access(void)
Definition elog.c:886
int errdetail(const char *fmt,...)
Definition elog.c:1216
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define LOG
Definition elog.h:31
#define WARNING
Definition elog.h:36
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
int FreeDir(DIR *dir)
Definition fd.c:3005
int FreeFile(FILE *file)
Definition fd.c:2823
struct dirent * ReadDirExtended(DIR *dir, const char *dirname, int elevel)
Definition fd.c:2968
DIR * AllocateDir(const char *dirname)
Definition fd.c:2887
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2624
#define palloc_object(type)
Definition fe_memutils.h:74
#define PG_RETURN_TEXT_P(x)
Definition fmgr.h:374
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
int MyProcPid
Definition globals.c:47
Oid MyDatabaseId
Definition globals.c:94
long val
Definition informix.c:689
#define INJECTION_POINT(name, arg)
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
List * lappend(List *list, void *datum)
Definition list.c:339
#define VirtualTransactionIdIsValid(vxid)
Definition lock.h:69
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
MemoryContext TopTransactionContext
Definition mcxt.c:171
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
#define InvalidPid
Definition miscadmin.h:32
void pairingheap_remove(pairingheap *heap, pairingheap_node *node)
void pairingheap_add(pairingheap *heap, pairingheap_node *node)
pairingheap_node * pairingheap_first(pairingheap *heap)
#define pairingheap_is_empty(h)
Definition pairingheap.h:99
#define pairingheap_is_singular(h)
#define pairingheap_container(type, membername, ptr)
Definition pairingheap.h:43
#define pairingheap_const_container(type, membername, ptr)
Definition pairingheap.h:51
#define pairingheap_reset(h)
Definition pairingheap.h:96
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
void * arg
#define MAXPGPATH
static char * filename
Definition pg_dumpall.c:120
static bool pg_lfind32(uint32 key, const uint32 *base, uint32 nelem)
Definition pg_lfind.h:153
#define lfirst(lc)
Definition pg_list.h:172
static int list_length(const List *l)
Definition pg_list.h:152
#define NIL
Definition pg_list.h:68
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define snprintf
Definition port.h:260
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
unsigned int Oid
void SetSerializableTransactionSnapshot(Snapshot snapshot, VirtualTransactionId *sourcevxid, int sourcepid)
Definition predicate.c:1720
Snapshot GetSerializableTransactionSnapshot(Snapshot snapshot)
Definition predicate.c:1680
static int fb(int x)
int GetMaxSnapshotSubxidCount(void)
Definition procarray.c:2030
Snapshot GetSnapshotData(Snapshot snapshot)
Definition procarray.c:2125
int GetMaxSnapshotXidCount(void)
Definition procarray.c:2019
bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
Definition procarray.c:2566
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
Definition procarray.c:2482
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
void ResourceOwnerForget(ResourceOwner owner, Datum value, const ResourceOwnerDesc *kind)
Definition resowner.c:561
void ResourceOwnerRemember(ResourceOwner owner, Datum value, const ResourceOwnerDesc *kind)
Definition resowner.c:521
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition resowner.c:449
#define RELEASE_PRIO_SNAPSHOT_REFS
Definition resowner.h:75
@ RESOURCE_RELEASE_AFTER_LOCKS
Definition resowner.h:56
Size add_size(Size s1, Size s2)
Definition shmem.c:495
Size mul_size(Size s1, Size s2)
Definition shmem.c:510
static Snapshot HistoricSnapshot
Definition snapmgr.c:152
static Snapshot FirstXactSnapshot
Definition snapmgr.c:200
TransactionId RecentXmin
Definition snapmgr.c:160
static SnapshotData CatalogSnapshotData
Definition snapmgr.c:143
void UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
Definition snapmgr.c:879
static void SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid, int sourcepid, PGPROC *sourceproc)
Definition snapmgr.c:511
void AtSubAbort_Snapshot(int level)
Definition snapmgr.c:982
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition snapmgr.c:1736
SnapshotData SnapshotSelfData
Definition snapmgr.c:144
void AtEOXact_Snapshot(bool isCommit, bool resetXmin)
Definition snapmgr.c:1016
static Snapshot CurrentSnapshot
Definition snapmgr.c:149
static Snapshot SecondarySnapshot
Definition snapmgr.c:150
bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
Definition snapmgr.c:1869
static List * exportedSnapshots
Definition snapmgr.c:213
static pairingheap RegisteredSnapshots
Definition snapmgr.c:190
bool FirstSnapshotSet
Definition snapmgr.c:193
static void UnregisterSnapshotNoOwner(Snapshot snapshot)
Definition snapmgr.c:889
Snapshot GetTransactionSnapshot(void)
Definition snapmgr.c:272
Snapshot GetLatestSnapshot(void)
Definition snapmgr.c:354
void TeardownHistoricSnapshot(bool is_error)
Definition snapmgr.c:1685
Snapshot GetCatalogSnapshot(Oid relid)
Definition snapmgr.c:385
void UnregisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:866
void PushActiveSnapshot(Snapshot snapshot)
Definition snapmgr.c:682
static Snapshot CopySnapshot(Snapshot snapshot)
Definition snapmgr.c:608
Snapshot RestoreSnapshot(char *start_address)
Definition snapmgr.c:1793
void AtSubCommit_Snapshot(int level)
Definition snapmgr.c:961
static void ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snap)
Definition snapmgr.c:235
void UpdateActiveSnapshotCommandId(void)
Definition snapmgr.c:744
static void SnapshotResetXmin(void)
Definition snapmgr.c:937
void RestoreTransactionSnapshot(Snapshot snapshot, PGPROC *source_pgproc)
Definition snapmgr.c:1853
static int parseIntFromText(const char *prefix, char **s, const char *filename)
Definition snapmgr.c:1307
static SnapshotData SecondarySnapshotData
Definition snapmgr.c:142
static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
Definition snapmgr.c:910
TransactionId TransactionXmin
Definition snapmgr.c:159
SnapshotData SnapshotAnyData
Definition snapmgr.c:145
bool HistoricSnapshotActive(void)
Definition snapmgr.c:1692
void ImportSnapshot(const char *idstr)
Definition snapmgr.c:1387
bool ActiveSnapshotSet(void)
Definition snapmgr.c:812
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:824
bool XactHasExportedSnapshots(void)
Definition snapmgr.c:1574
static const ResourceOwnerDesc snapshot_resowner_desc
Definition snapmgr.c:224
void DeleteAllExportedSnapshotFiles(void)
Definition snapmgr.c:1587
static void parseVxidFromText(const char *prefix, char **s, const char *filename, VirtualTransactionId *vxid)
Definition snapmgr.c:1357
static void FreeSnapshot(Snapshot snapshot)
Definition snapmgr.c:664
#define SNAPSHOT_EXPORT_DIR
Definition snapmgr.c:203
bool HaveRegisteredOrActiveSnapshot(void)
Definition snapmgr.c:1644
void InvalidateCatalogSnapshotConditionally(void)
Definition snapmgr.c:477
static SnapshotData CurrentSnapshotData
Definition snapmgr.c:141
SnapshotData SnapshotToastData
Definition snapmgr.c:146
static void ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snap)
Definition snapmgr.c:240
bool ThereAreNoPriorRegisteredSnapshots(void)
Definition snapmgr.c:1626
void SnapshotSetCommandId(CommandId curcid)
Definition snapmgr.c:490
void PopActiveSnapshot(void)
Definition snapmgr.c:775
void PushCopiedSnapshot(Snapshot snapshot)
Definition snapmgr.c:732
char * ExportSnapshot(Snapshot snapshot)
Definition snapmgr.c:1115
Size EstimateSnapshotSpace(Snapshot snapshot)
Definition snapmgr.c:1712
static ActiveSnapshotElt * ActiveSnapshot
Definition snapmgr.c:181
void SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
Definition snapmgr.c:1669
static HTAB * tuplecid_data
Definition snapmgr.c:163
Snapshot RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
Definition snapmgr.c:837
static TransactionId parseXidFromText(const char *prefix, char **s, const char *filename)
Definition snapmgr.c:1332
HTAB * HistoricSnapshotGetTupleCids(void)
Definition snapmgr.c:1698
void InvalidateCatalogSnapshot(void)
Definition snapmgr.c:455
void PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
Definition snapmgr.c:696
Snapshot GetNonHistoricCatalogSnapshot(Oid relid)
Definition snapmgr.c:407
static Snapshot CatalogSnapshot
Definition snapmgr.c:151
Snapshot GetActiveSnapshot(void)
Definition snapmgr.c:800
Datum pg_export_snapshot(PG_FUNCTION_ARGS)
Definition snapmgr.c:1292
static void ResOwnerReleaseSnapshot(Datum res)
Definition snapmgr.c:1968
struct SnapshotData * Snapshot
Definition snapshot.h:117
@ SNAPSHOT_TOAST
Definition snapshot.h:70
@ SNAPSHOT_SELF
Definition snapshot.h:60
@ SNAPSHOT_MVCC
Definition snapshot.h:46
@ SNAPSHOT_ANY
Definition snapshot.h:65
#define InvalidSnapshot
Definition snapshot.h:119
PGPROC * MyProc
Definition proc.c:67
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
struct ActiveSnapshotElt * as_next
Definition snapmgr.c:177
Snapshot as_snap
Definition snapmgr.c:175
Definition dirent.c:26
char * snapfile
Definition snapmgr.c:208
Snapshot snapshot
Definition snapmgr.c:209
Definition pg_list.h:54
Definition proc.h:179
TransactionId xmin
Definition proc.h:194
LocalTransactionId lxid
Definition proc.h:217
ProcNumber procNumber
Definition proc.h:212
struct PGPROC::@131 vxid
const char * name
Definition resowner.h:93
TransactionId xmax
Definition snapmgr.c:254
TransactionId xmin
Definition snapmgr.c:253
TransactionId xmin
Definition snapshot.h:153
int32 subxcnt
Definition snapshot.h:177
uint32 regd_count
Definition snapshot.h:201
uint32 active_count
Definition snapshot.h:200
CommandId curcid
Definition snapshot.h:183
pairingheap_node ph_node
Definition snapshot.h:202
uint32 xcnt
Definition snapshot.h:165
TransactionId * subxip
Definition snapshot.h:176
uint64 snapXactCompletionCount
Definition snapshot.h:209
TransactionId xmax
Definition snapshot.h:154
SnapshotType snapshot_type
Definition snapshot.h:140
TransactionId * xip
Definition snapshot.h:164
bool suboverflowed
Definition snapshot.h:178
bool takenDuringRecovery
Definition snapshot.h:180
LocalTransactionId localTransactionId
Definition lock.h:64
ProcNumber procNumber
Definition lock.h:63
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition subtrans.c:162
bool RelationHasSysCache(Oid relid)
Definition syscache.c:737
bool RelationInvalidatesSnapshotsOnly(Oid relid)
Definition syscache.c:714
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
#define InvalidTransactionId
Definition transam.h:31
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:312
#define FirstNormalTransactionId
Definition transam.h:34
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
text * cstring_to_text(const char *s)
Definition varlena.c:181
#define fstat
Definition win32_port.h:73
int GetCurrentTransactionNestLevel(void)
Definition xact.c:930
bool XactReadOnly
Definition xact.c:83
TransactionId GetTopTransactionIdIfAny(void)
Definition xact.c:442
int XactIsoLevel
Definition xact.c:80
bool IsSubTransaction(void)
Definition xact.c:5066
bool IsInParallelMode(void)
Definition xact.c:1090
int xactGetCommittedChildren(TransactionId **ptr)
Definition xact.c:5812
CommandId GetCurrentCommandId(bool used)
Definition xact.c:830
#define XACT_SERIALIZABLE
Definition xact.h:39
#define IsolationUsesXactSnapshot()
Definition xact.h:52
#define IsolationIsSerializable()
Definition xact.h:53