PostgreSQL Source Code  git master
snapmgr.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * snapmgr.c
4  * PostgreSQL snapshot manager
5  *
6  * We keep track of snapshots in two ways: those "registered" by resowner.c,
7  * and the "active snapshot" stack. All snapshots in either of them live in
8  * persistent memory. When a snapshot is no longer in any of these lists
9  * (tracked by separate refcounts on each snapshot), its memory can be freed.
10  *
11  * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
12  * regd_count and list it in RegisteredSnapshots, but this reference is not
13  * tracked by a resource owner. We used to use the TopTransactionResourceOwner
14  * to track this snapshot reference, but that introduces logical circularity
15  * and thus makes it impossible to clean up in a sane fashion. It's better to
16  * handle this reference as an internally-tracked registration, so that this
17  * module is entirely lower-level than ResourceOwners.
18  *
19  * Likewise, any snapshots that have been exported by pg_export_snapshot
20  * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
21  * tracked by any resource owner.
22  *
23  * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
24  * is valid, but is not tracked by any resource owner.
25  *
26  * The same is true for historic snapshots used during logical decoding,
27  * their lifetime is managed separately (as they live longer than one xact.c
28  * transaction).
29  *
30  * These arrangements let us reset MyProc->xmin when there are no snapshots
31  * referenced by this transaction, and advance it when the one with oldest
32  * Xmin is no longer referenced. For simplicity however, only registered
33  * snapshots not active snapshots participate in tracking which one is oldest;
34  * we don't try to change MyProc->xmin except when the active-snapshot
35  * stack is empty.
36  *
37  *
38  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
39  * Portions Copyright (c) 1994, Regents of the University of California
40  *
41  * IDENTIFICATION
42  * src/backend/utils/time/snapmgr.c
43  *
44  *-------------------------------------------------------------------------
45  */
46 #include "postgres.h"
47 
48 #include <sys/stat.h>
49 #include <unistd.h>
50 
51 #include "access/subtrans.h"
52 #include "access/transam.h"
53 #include "access/xact.h"
54 #include "access/xlog.h"
55 #include "catalog/catalog.h"
56 #include "datatype/timestamp.h"
57 #include "lib/pairingheap.h"
58 #include "miscadmin.h"
59 #include "storage/predicate.h"
60 #include "storage/proc.h"
61 #include "storage/procarray.h"
62 #include "storage/sinval.h"
63 #include "storage/sinvaladt.h"
64 #include "storage/spin.h"
65 #include "utils/builtins.h"
66 #include "utils/memutils.h"
67 #include "utils/old_snapshot.h"
68 #include "utils/rel.h"
69 #include "utils/resowner_private.h"
70 #include "utils/snapmgr.h"
71 #include "utils/syscache.h"
72 #include "utils/timestamp.h"
73 
74 
75 /*
76  * GUC parameters
77  */
78 int old_snapshot_threshold; /* number of minutes, -1 disables */
79 
81 
82 
83 /*
84  * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
85  * mode, and to the latest one taken in a read-committed transaction.
86  * SecondarySnapshot is a snapshot that's always up-to-date as of the current
87  * instant, even in transaction-snapshot mode. It should only be used for
88  * special-purpose code (say, RI checking.) CatalogSnapshot points to an
89  * MVCC snapshot intended to be used for catalog scans; we must invalidate it
90  * whenever a system catalog change occurs.
91  *
92  * These SnapshotData structs are static to simplify memory allocation
93  * (see the hack in GetSnapshotData to avoid repeated malloc/free).
94  */
100 
101 /* Pointers to valid snapshots */
102 static Snapshot CurrentSnapshot = NULL;
104 static Snapshot CatalogSnapshot = NULL;
106 
107 /*
108  * These are updated by GetSnapshotData. We initialize them this way
109  * for the convenience of TransactionIdIsInProgress: even in bootstrap
110  * mode, we don't want it to say that BootstrapTransactionId is in progress.
111  */
114 
115 /* (table, ctid) => (cmin, cmax) mapping during timetravel */
116 static HTAB *tuplecid_data = NULL;
117 
118 /*
119  * Elements of the active snapshot stack.
120  *
121  * Each element here accounts for exactly one active_count on SnapshotData.
122  *
123  * NB: the code assumes that elements in this list are in non-increasing
124  * order of as_level; also, the list must be NULL-terminated.
125  */
126 typedef struct ActiveSnapshotElt
127 {
129  int as_level;
132 
133 /* Top of the stack of active snapshots */
135 
136 /* Bottom of the stack of active snapshots */
138 
139 /*
140  * Currently registered Snapshots. Ordered in a heap by xmin, so that we can
141  * quickly find the one with lowest xmin, to advance our MyProc->xmin.
142  */
143 static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
144  void *arg);
145 
146 static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL};
147 
148 /* first GetTransactionSnapshot call in a transaction? */
149 bool FirstSnapshotSet = false;
150 
151 /*
152  * Remember the serializable transaction snapshot, if any. We cannot trust
153  * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
154  * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
155  */
157 
158 /* Define pathname of exported-snapshot files */
159 #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
160 
161 /* Structure holding info about exported snapshot. */
162 typedef struct ExportedSnapshot
163 {
164  char *snapfile;
167 
168 /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
170 
171 /* Prototypes for local functions */
173 static Snapshot CopySnapshot(Snapshot snapshot);
174 static void FreeSnapshot(Snapshot snapshot);
175 static void SnapshotResetXmin(void);
176 
177 /*
178  * Snapshot fields to be serialized.
179  *
180  * Only these fields need to be sent to the cooperating backend; the
181  * remaining ones can (and must) be set by the receiver upon restore.
182  */
184 {
195 
196 Size
198 {
199  Size size;
200 
201  size = offsetof(OldSnapshotControlData, xid_by_minute);
202  if (old_snapshot_threshold > 0)
203  size = add_size(size, mul_size(sizeof(TransactionId),
205 
206  return size;
207 }
208 
209 /*
210  * Initialize for managing old snapshot detection.
211  */
212 void
214 {
215  bool found;
216 
217  /*
218  * Create or attach to the OldSnapshotControlData structure.
219  */
221  ShmemInitStruct("OldSnapshotControlData",
222  SnapMgrShmemSize(), &found);
223 
224  if (!found)
225  {
237  }
238 }
239 
240 /*
241  * GetTransactionSnapshot
242  * Get the appropriate snapshot for a new query in a transaction.
243  *
244  * Note that the return value may point at static storage that will be modified
245  * by future calls and by CommandCounterIncrement(). Callers should call
246  * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
247  * used very long.
248  */
249 Snapshot
251 {
252  /*
253  * Return historic snapshot if doing logical decoding. We'll never need a
254  * non-historic transaction snapshot in this (sub-)transaction, so there's
255  * no need to be careful to set one up for later calls to
256  * GetTransactionSnapshot().
257  */
259  {
261  return HistoricSnapshot;
262  }
263 
264  /* First call in transaction? */
265  if (!FirstSnapshotSet)
266  {
267  /*
268  * Don't allow catalog snapshot to be older than xact snapshot. Must
269  * do this first to allow the empty-heap Assert to succeed.
270  */
272 
274  Assert(FirstXactSnapshot == NULL);
275 
276  if (IsInParallelMode())
277  elog(ERROR,
278  "cannot take query snapshot during a parallel operation");
279 
280  /*
281  * In transaction-snapshot mode, the first snapshot must live until
282  * end of xact regardless of what the caller does with it, so we must
283  * make a copy of it rather than returning CurrentSnapshotData
284  * directly. Furthermore, if we're running in serializable mode,
285  * predicate.c needs to wrap the snapshot fetch in its own processing.
286  */
288  {
289  /* First, create the snapshot in CurrentSnapshotData */
292  else
294  /* Make a saved copy */
297  /* Mark it as "registered" in FirstXactSnapshot */
300  }
301  else
303 
304  FirstSnapshotSet = true;
305  return CurrentSnapshot;
306  }
307 
309  return CurrentSnapshot;
310 
311  /* Don't allow catalog snapshot to be older than xact snapshot. */
313 
315 
316  return CurrentSnapshot;
317 }
318 
319 /*
320  * GetLatestSnapshot
321  * Get a snapshot that is up-to-date as of the current instant,
322  * even if we are executing in transaction-snapshot mode.
323  */
324 Snapshot
326 {
327  /*
328  * We might be able to relax this, but nothing that could otherwise work
329  * needs it.
330  */
331  if (IsInParallelMode())
332  elog(ERROR,
333  "cannot update SecondarySnapshot during a parallel operation");
334 
335  /*
336  * So far there are no cases requiring support for GetLatestSnapshot()
337  * during logical decoding, but it wouldn't be hard to add if required.
338  */
340 
341  /* If first call in transaction, go ahead and set the xact snapshot */
342  if (!FirstSnapshotSet)
343  return GetTransactionSnapshot();
344 
346 
347  return SecondarySnapshot;
348 }
349 
350 /*
351  * GetOldestSnapshot
352  *
353  * Get the transaction's oldest known snapshot, as judged by the LSN.
354  * Will return NULL if there are no active or registered snapshots.
355  */
356 Snapshot
358 {
359  Snapshot OldestRegisteredSnapshot = NULL;
360  XLogRecPtr RegisteredLSN = InvalidXLogRecPtr;
361 
363  {
364  OldestRegisteredSnapshot = pairingheap_container(SnapshotData, ph_node,
366  RegisteredLSN = OldestRegisteredSnapshot->lsn;
367  }
368 
369  if (OldestActiveSnapshot != NULL)
370  {
372 
373  if (XLogRecPtrIsInvalid(RegisteredLSN) || RegisteredLSN > ActiveLSN)
375  }
376 
377  return OldestRegisteredSnapshot;
378 }
379 
380 /*
381  * GetCatalogSnapshot
382  * Get a snapshot that is sufficiently up-to-date for scan of the
383  * system catalog with the specified OID.
384  */
385 Snapshot
387 {
388  /*
389  * Return historic snapshot while we're doing logical decoding, so we can
390  * see the appropriate state of the catalog.
391  *
392  * This is the primary reason for needing to reset the system caches after
393  * finishing decoding.
394  */
396  return HistoricSnapshot;
397 
398  return GetNonHistoricCatalogSnapshot(relid);
399 }
400 
401 /*
402  * GetNonHistoricCatalogSnapshot
403  * Get a snapshot that is sufficiently up-to-date for scan of the system
404  * catalog with the specified OID, even while historic snapshots are set
405  * up.
406  */
407 Snapshot
409 {
410  /*
411  * If the caller is trying to scan a relation that has no syscache, no
412  * catcache invalidations will be sent when it is updated. For a few key
413  * relations, snapshot invalidations are sent instead. If we're trying to
414  * scan a relation for which neither catcache nor snapshot invalidations
415  * are sent, we must refresh the snapshot every time.
416  */
417  if (CatalogSnapshot &&
419  !RelationHasSysCache(relid))
421 
422  if (CatalogSnapshot == NULL)
423  {
424  /* Get new snapshot. */
426 
427  /*
428  * Make sure the catalog snapshot will be accounted for in decisions
429  * about advancing PGPROC->xmin. We could apply RegisterSnapshot, but
430  * that would result in making a physical copy, which is overkill; and
431  * it would also create a dependency on some resource owner, which we
432  * do not want for reasons explained at the head of this file. Instead
433  * just shove the CatalogSnapshot into the pairing heap manually. This
434  * has to be reversed in InvalidateCatalogSnapshot, of course.
435  *
436  * NB: it had better be impossible for this to throw error, since the
437  * CatalogSnapshot pointer is already valid.
438  */
440  }
441 
442  return CatalogSnapshot;
443 }
444 
445 /*
446  * InvalidateCatalogSnapshot
447  * Mark the current catalog snapshot, if any, as invalid
448  *
449  * We could change this API to allow the caller to provide more fine-grained
450  * invalidation details, so that a change to relation A wouldn't prevent us
451  * from using our cached snapshot to scan relation B, but so far there's no
452  * evidence that the CPU cycles we spent tracking such fine details would be
453  * well-spent.
454  */
455 void
457 {
458  if (CatalogSnapshot)
459  {
461  CatalogSnapshot = NULL;
463  }
464 }
465 
466 /*
467  * InvalidateCatalogSnapshotConditionally
468  * Drop catalog snapshot if it's the only one we have
469  *
470  * This is called when we are about to wait for client input, so we don't
471  * want to continue holding the catalog snapshot if it might mean that the
472  * global xmin horizon can't advance. However, if there are other snapshots
473  * still active or registered, the catalog snapshot isn't likely to be the
474  * oldest one, so we might as well keep it.
475  */
476 void
478 {
479  if (CatalogSnapshot &&
480  ActiveSnapshot == NULL &&
483 }
484 
485 /*
486  * SnapshotSetCommandId
487  * Propagate CommandCounterIncrement into the static snapshots, if set
488  */
489 void
491 {
492  if (!FirstSnapshotSet)
493  return;
494 
495  if (CurrentSnapshot)
496  CurrentSnapshot->curcid = curcid;
497  if (SecondarySnapshot)
498  SecondarySnapshot->curcid = curcid;
499  /* Should we do the same with CatalogSnapshot? */
500 }
501 
502 /*
503  * SetTransactionSnapshot
504  * Set the transaction's snapshot from an imported MVCC snapshot.
505  *
506  * Note that this is very closely tied to GetTransactionSnapshot --- it
507  * must take care of all the same considerations as the first-snapshot case
508  * in GetTransactionSnapshot.
509  */
510 static void
512  int sourcepid, PGPROC *sourceproc)
513 {
514  /* Caller should have checked this already */
516 
517  /* Better do this to ensure following Assert succeeds. */
519 
521  Assert(FirstXactSnapshot == NULL);
523 
524  /*
525  * Even though we are not going to use the snapshot it computes, we must
526  * call GetSnapshotData, for two reasons: (1) to be sure that
527  * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
528  * the state for GlobalVis*.
529  */
531 
532  /*
533  * Now copy appropriate fields from the source snapshot.
534  */
535  CurrentSnapshot->xmin = sourcesnap->xmin;
536  CurrentSnapshot->xmax = sourcesnap->xmax;
537  CurrentSnapshot->xcnt = sourcesnap->xcnt;
538  Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
539  if (sourcesnap->xcnt > 0)
540  memcpy(CurrentSnapshot->xip, sourcesnap->xip,
541  sourcesnap->xcnt * sizeof(TransactionId));
542  CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
543  Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
544  if (sourcesnap->subxcnt > 0)
545  memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
546  sourcesnap->subxcnt * sizeof(TransactionId));
549  /* NB: curcid should NOT be copied, it's a local matter */
550 
552 
553  /*
554  * Now we have to fix what GetSnapshotData did with MyProc->xmin and
555  * TransactionXmin. There is a race condition: to make sure we are not
556  * causing the global xmin to go backwards, we have to test that the
557  * source transaction is still running, and that has to be done
558  * atomically. So let procarray.c do it.
559  *
560  * Note: in serializable mode, predicate.c will do this a second time. It
561  * doesn't seem worth contorting the logic here to avoid two calls,
562  * especially since it's not clear that predicate.c *must* do this.
563  */
564  if (sourceproc != NULL)
565  {
567  ereport(ERROR,
568  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
569  errmsg("could not import the requested snapshot"),
570  errdetail("The source transaction is not running anymore.")));
571  }
572  else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid))
573  ereport(ERROR,
574  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
575  errmsg("could not import the requested snapshot"),
576  errdetail("The source process with PID %d is not running anymore.",
577  sourcepid)));
578 
579  /*
580  * In transaction-snapshot mode, the first snapshot must live until end of
581  * xact, so we must make a copy of it. Furthermore, if we're running in
582  * serializable mode, predicate.c needs to do its own processing.
583  */
585  {
588  sourcepid);
589  /* Make a saved copy */
592  /* Mark it as "registered" in FirstXactSnapshot */
595  }
596 
597  FirstSnapshotSet = true;
598 }
599 
600 /*
601  * CopySnapshot
602  * Copy the given snapshot.
603  *
604  * The copy is palloc'd in TopTransactionContext and has initial refcounts set
605  * to 0. The returned snapshot has the copied flag set.
606  */
607 static Snapshot
609 {
610  Snapshot newsnap;
611  Size subxipoff;
612  Size size;
613 
614  Assert(snapshot != InvalidSnapshot);
615 
616  /* We allocate any XID arrays needed in the same palloc block. */
617  size = subxipoff = sizeof(SnapshotData) +
618  snapshot->xcnt * sizeof(TransactionId);
619  if (snapshot->subxcnt > 0)
620  size += snapshot->subxcnt * sizeof(TransactionId);
621 
623  memcpy(newsnap, snapshot, sizeof(SnapshotData));
624 
625  newsnap->regd_count = 0;
626  newsnap->active_count = 0;
627  newsnap->copied = true;
628  newsnap->snapXactCompletionCount = 0;
629 
630  /* setup XID array */
631  if (snapshot->xcnt > 0)
632  {
633  newsnap->xip = (TransactionId *) (newsnap + 1);
634  memcpy(newsnap->xip, snapshot->xip,
635  snapshot->xcnt * sizeof(TransactionId));
636  }
637  else
638  newsnap->xip = NULL;
639 
640  /*
641  * Setup subXID array. Don't bother to copy it if it had overflowed,
642  * though, because it's not used anywhere in that case. Except if it's a
643  * snapshot taken during recovery; all the top-level XIDs are in subxip as
644  * well in that case, so we mustn't lose them.
645  */
646  if (snapshot->subxcnt > 0 &&
647  (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
648  {
649  newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
650  memcpy(newsnap->subxip, snapshot->subxip,
651  snapshot->subxcnt * sizeof(TransactionId));
652  }
653  else
654  newsnap->subxip = NULL;
655 
656  return newsnap;
657 }
658 
659 /*
660  * FreeSnapshot
661  * Free the memory associated with a snapshot.
662  */
663 static void
665 {
666  Assert(snapshot->regd_count == 0);
667  Assert(snapshot->active_count == 0);
668  Assert(snapshot->copied);
669 
670  pfree(snapshot);
671 }
672 
673 /*
674  * PushActiveSnapshot
675  * Set the given snapshot as the current active snapshot
676  *
677  * If the passed snapshot is a statically-allocated one, or it is possibly
678  * subject to a future command counter update, create a new long-lived copy
679  * with active refcount=1. Otherwise, only increment the refcount.
680  */
681 void
683 {
685 }
686 
687 /*
688  * PushActiveSnapshotWithLevel
689  * Set the given snapshot as the current active snapshot
690  *
691  * Same as PushActiveSnapshot except that caller can specify the
692  * transaction nesting level that "owns" the snapshot. This level
693  * must not be deeper than the current top of the snapshot stack.
694  */
695 void
697 {
698  ActiveSnapshotElt *newactive;
699 
700  Assert(snap != InvalidSnapshot);
701  Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level);
702 
704 
705  /*
706  * Checking SecondarySnapshot is probably useless here, but it seems
707  * better to be sure.
708  */
709  if (snap == CurrentSnapshot || snap == SecondarySnapshot || !snap->copied)
710  newactive->as_snap = CopySnapshot(snap);
711  else
712  newactive->as_snap = snap;
713 
714  newactive->as_next = ActiveSnapshot;
715  newactive->as_level = snap_level;
716 
717  newactive->as_snap->active_count++;
718 
719  ActiveSnapshot = newactive;
720  if (OldestActiveSnapshot == NULL)
722 }
723 
724 /*
725  * PushCopiedSnapshot
726  * As above, except forcibly copy the presented snapshot.
727  *
728  * This should be used when the ActiveSnapshot has to be modifiable, for
729  * example if the caller intends to call UpdateActiveSnapshotCommandId.
730  * The new snapshot will be released when popped from the stack.
731  */
732 void
734 {
735  PushActiveSnapshot(CopySnapshot(snapshot));
736 }
737 
738 /*
739  * UpdateActiveSnapshotCommandId
740  *
741  * Update the current CID of the active snapshot. This can only be applied
742  * to a snapshot that is not referenced elsewhere.
743  */
744 void
746 {
747  CommandId save_curcid,
748  curcid;
749 
750  Assert(ActiveSnapshot != NULL);
753 
754  /*
755  * Don't allow modification of the active snapshot during parallel
756  * operation. We share the snapshot to worker backends at the beginning
757  * of parallel operation, so any change to the snapshot can lead to
758  * inconsistencies. We have other defenses against
759  * CommandCounterIncrement, but there are a few places that call this
760  * directly, so we put an additional guard here.
761  */
762  save_curcid = ActiveSnapshot->as_snap->curcid;
763  curcid = GetCurrentCommandId(false);
764  if (IsInParallelMode() && save_curcid != curcid)
765  elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
766  ActiveSnapshot->as_snap->curcid = curcid;
767 }
768 
769 /*
770  * PopActiveSnapshot
771  *
772  * Remove the topmost snapshot from the active snapshot stack, decrementing the
773  * reference count, and free it if this was the last reference.
774  */
775 void
777 {
778  ActiveSnapshotElt *newstack;
779 
780  newstack = ActiveSnapshot->as_next;
781 
783 
785 
786  if (ActiveSnapshot->as_snap->active_count == 0 &&
789 
791  ActiveSnapshot = newstack;
792  if (ActiveSnapshot == NULL)
793  OldestActiveSnapshot = NULL;
794 
796 }
797 
798 /*
799  * GetActiveSnapshot
800  * Return the topmost snapshot in the Active stack.
801  */
802 Snapshot
804 {
805  Assert(ActiveSnapshot != NULL);
806 
807  return ActiveSnapshot->as_snap;
808 }
809 
810 /*
811  * ActiveSnapshotSet
812  * Return whether there is at least one snapshot in the Active stack
813  */
814 bool
816 {
817  return ActiveSnapshot != NULL;
818 }
819 
820 /*
821  * RegisterSnapshot
822  * Register a snapshot as being in use by the current resource owner
823  *
824  * If InvalidSnapshot is passed, it is not registered.
825  */
826 Snapshot
828 {
829  if (snapshot == InvalidSnapshot)
830  return InvalidSnapshot;
831 
833 }
834 
835 /*
836  * RegisterSnapshotOnOwner
837  * As above, but use the specified resource owner
838  */
839 Snapshot
841 {
842  Snapshot snap;
843 
844  if (snapshot == InvalidSnapshot)
845  return InvalidSnapshot;
846 
847  /* Static snapshot? Create a persistent copy */
848  snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
849 
850  /* and tell resowner.c about it */
852  snap->regd_count++;
853  ResourceOwnerRememberSnapshot(owner, snap);
854 
855  if (snap->regd_count == 1)
857 
858  return snap;
859 }
860 
861 /*
862  * UnregisterSnapshot
863  *
864  * Decrement the reference count of a snapshot, remove the corresponding
865  * reference from CurrentResourceOwner, and free the snapshot if no more
866  * references remain.
867  */
868 void
870 {
871  if (snapshot == NULL)
872  return;
873 
875 }
876 
877 /*
878  * UnregisterSnapshotFromOwner
879  * As above, but use the specified resource owner
880  */
881 void
883 {
884  if (snapshot == NULL)
885  return;
886 
887  Assert(snapshot->regd_count > 0);
889 
890  ResourceOwnerForgetSnapshot(owner, snapshot);
891 
892  snapshot->regd_count--;
893  if (snapshot->regd_count == 0)
895 
896  if (snapshot->regd_count == 0 && snapshot->active_count == 0)
897  {
898  FreeSnapshot(snapshot);
900  }
901 }
902 
903 /*
904  * Comparison function for RegisteredSnapshots heap. Snapshots are ordered
905  * by xmin, so that the snapshot with smallest xmin is at the top.
906  */
907 static int
909 {
910  const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
911  const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
912 
913  if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
914  return 1;
915  else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
916  return -1;
917  else
918  return 0;
919 }
920 
921 /*
922  * SnapshotResetXmin
923  *
924  * If there are no more snapshots, we can reset our PGPROC->xmin to
925  * InvalidTransactionId. Note we can do this without locking because we assume
926  * that storing an Xid is atomic.
927  *
928  * Even if there are some remaining snapshots, we may be able to advance our
929  * PGPROC->xmin to some degree. This typically happens when a portal is
930  * dropped. For efficiency, we only consider recomputing PGPROC->xmin when
931  * the active snapshot stack is empty; this allows us not to need to track
932  * which active snapshot is oldest.
933  *
934  * Note: it's tempting to use GetOldestSnapshot() here so that we can include
935  * active snapshots in the calculation. However, that compares by LSN not
936  * xmin so it's not entirely clear that it's the same thing. Also, we'd be
937  * critically dependent on the assumption that the bottommost active snapshot
938  * stack entry has the oldest xmin. (Current uses of GetOldestSnapshot() are
939  * not actually critical, but this would be.)
940  */
941 static void
943 {
944  Snapshot minSnapshot;
945 
946  if (ActiveSnapshot != NULL)
947  return;
948 
950  {
952  return;
953  }
954 
955  minSnapshot = pairingheap_container(SnapshotData, ph_node,
957 
958  if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
959  MyProc->xmin = minSnapshot->xmin;
960 }
961 
962 /*
963  * AtSubCommit_Snapshot
964  */
965 void
967 {
968  ActiveSnapshotElt *active;
969 
970  /*
971  * Relabel the active snapshots set in this subtransaction as though they
972  * are owned by the parent subxact.
973  */
974  for (active = ActiveSnapshot; active != NULL; active = active->as_next)
975  {
976  if (active->as_level < level)
977  break;
978  active->as_level = level - 1;
979  }
980 }
981 
982 /*
983  * AtSubAbort_Snapshot
984  * Clean up snapshots after a subtransaction abort
985  */
986 void
988 {
989  /* Forget the active snapshots set by this subtransaction */
990  while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
991  {
993 
995 
996  /*
997  * Decrement the snapshot's active count. If it's still registered or
998  * marked as active by an outer subtransaction, we can't free it yet.
999  */
1002 
1003  if (ActiveSnapshot->as_snap->active_count == 0 &&
1006 
1007  /* and free the stack element */
1009 
1010  ActiveSnapshot = next;
1011  if (ActiveSnapshot == NULL)
1012  OldestActiveSnapshot = NULL;
1013  }
1014 
1016 }
1017 
1018 /*
1019  * AtEOXact_Snapshot
1020  * Snapshot manager's cleanup function for end of transaction
1021  */
1022 void
1023 AtEOXact_Snapshot(bool isCommit, bool resetXmin)
1024 {
1025  /*
1026  * In transaction-snapshot mode we must release our privately-managed
1027  * reference to the transaction snapshot. We must remove it from
1028  * RegisteredSnapshots to keep the check below happy. But we don't bother
1029  * to do FreeSnapshot, for two reasons: the memory will go away with
1030  * TopTransactionContext anyway, and if someone has left the snapshot
1031  * stacked as active, we don't want the code below to be chasing through a
1032  * dangling pointer.
1033  */
1034  if (FirstXactSnapshot != NULL)
1035  {
1039  }
1040  FirstXactSnapshot = NULL;
1041 
1042  /*
1043  * If we exported any snapshots, clean them up.
1044  */
1045  if (exportedSnapshots != NIL)
1046  {
1047  ListCell *lc;
1048 
1049  /*
1050  * Get rid of the files. Unlink failure is only a WARNING because (1)
1051  * it's too late to abort the transaction, and (2) leaving a leaked
1052  * file around has little real consequence anyway.
1053  *
1054  * We also need to remove the snapshots from RegisteredSnapshots to
1055  * prevent a warning below.
1056  *
1057  * As with the FirstXactSnapshot, we don't need to free resources of
1058  * the snapshot itself as it will go away with the memory context.
1059  */
1060  foreach(lc, exportedSnapshots)
1061  {
1062  ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc);
1063 
1064  if (unlink(esnap->snapfile))
1065  elog(WARNING, "could not unlink file \"%s\": %m",
1066  esnap->snapfile);
1067 
1069  &esnap->snapshot->ph_node);
1070  }
1071 
1073  }
1074 
1075  /* Drop catalog snapshot if any */
1077 
1078  /* On commit, complain about leftover snapshots */
1079  if (isCommit)
1080  {
1081  ActiveSnapshotElt *active;
1082 
1084  elog(WARNING, "registered snapshots seem to remain after cleanup");
1085 
1086  /* complain about unpopped active snapshots */
1087  for (active = ActiveSnapshot; active != NULL; active = active->as_next)
1088  elog(WARNING, "snapshot %p still active", active);
1089  }
1090 
1091  /*
1092  * And reset our state. We don't need to free the memory explicitly --
1093  * it'll go away with TopTransactionContext.
1094  */
1095  ActiveSnapshot = NULL;
1096  OldestActiveSnapshot = NULL;
1098 
1099  CurrentSnapshot = NULL;
1100  SecondarySnapshot = NULL;
1101 
1102  FirstSnapshotSet = false;
1103 
1104  /*
1105  * During normal commit processing, we call ProcArrayEndTransaction() to
1106  * reset the MyProc->xmin. That call happens prior to the call to
1107  * AtEOXact_Snapshot(), so we need not touch xmin here at all.
1108  */
1109  if (resetXmin)
1111 
1112  Assert(resetXmin || MyProc->xmin == 0);
1113 }
1114 
1115 
1116 /*
1117  * ExportSnapshot
1118  * Export the snapshot to a file so that other backends can import it.
1119  * Returns the token (the file name) that can be used to import this
1120  * snapshot.
1121  */
1122 char *
1124 {
1125  TransactionId topXid;
1126  TransactionId *children;
1127  ExportedSnapshot *esnap;
1128  int nchildren;
1129  int addTopXid;
1131  FILE *f;
1132  int i;
1133  MemoryContext oldcxt;
1134  char path[MAXPGPATH];
1135  char pathtmp[MAXPGPATH];
1136 
1137  /*
1138  * It's tempting to call RequireTransactionBlock here, since it's not very
1139  * useful to export a snapshot that will disappear immediately afterwards.
1140  * However, we haven't got enough information to do that, since we don't
1141  * know if we're at top level or not. For example, we could be inside a
1142  * plpgsql function that is going to fire off other transactions via
1143  * dblink. Rather than disallow perfectly legitimate usages, don't make a
1144  * check.
1145  *
1146  * Also note that we don't make any restriction on the transaction's
1147  * isolation level; however, importers must check the level if they are
1148  * serializable.
1149  */
1150 
1151  /*
1152  * Get our transaction ID if there is one, to include in the snapshot.
1153  */
1154  topXid = GetTopTransactionIdIfAny();
1155 
1156  /*
1157  * We cannot export a snapshot from a subtransaction because there's no
1158  * easy way for importers to verify that the same subtransaction is still
1159  * running.
1160  */
1161  if (IsSubTransaction())
1162  ereport(ERROR,
1163  (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1164  errmsg("cannot export a snapshot from a subtransaction")));
1165 
1166  /*
1167  * We do however allow previous committed subtransactions to exist.
1168  * Importers of the snapshot must see them as still running, so get their
1169  * XIDs to add them to the snapshot.
1170  */
1171  nchildren = xactGetCommittedChildren(&children);
1172 
1173  /*
1174  * Generate file path for the snapshot. We start numbering of snapshots
1175  * inside the transaction from 1.
1176  */
1177  snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
1179 
1180  /*
1181  * Copy the snapshot into TopTransactionContext, add it to the
1182  * exportedSnapshots list, and mark it pseudo-registered. We do this to
1183  * ensure that the snapshot's xmin is honored for the rest of the
1184  * transaction.
1185  */
1186  snapshot = CopySnapshot(snapshot);
1187 
1189  esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot));
1190  esnap->snapfile = pstrdup(path);
1191  esnap->snapshot = snapshot;
1193  MemoryContextSwitchTo(oldcxt);
1194 
1195  snapshot->regd_count++;
1197 
1198  /*
1199  * Fill buf with a text serialization of the snapshot, plus identification
1200  * data about this transaction. The format expected by ImportSnapshot is
1201  * pretty rigid: each line must be fieldname:value.
1202  */
1203  initStringInfo(&buf);
1204 
1205  appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->backendId, MyProc->lxid);
1206  appendStringInfo(&buf, "pid:%d\n", MyProcPid);
1207  appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
1208  appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
1209  appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
1210 
1211  appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
1212  appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
1213 
1214  /*
1215  * We must include our own top transaction ID in the top-xid data, since
1216  * by definition we will still be running when the importing transaction
1217  * adopts the snapshot, but GetSnapshotData never includes our own XID in
1218  * the snapshot. (There must, therefore, be enough room to add it.)
1219  *
1220  * However, it could be that our topXid is after the xmax, in which case
1221  * we shouldn't include it because xip[] members are expected to be before
1222  * xmax. (We need not make the same check for subxip[] members, see
1223  * snapshot.h.)
1224  */
1225  addTopXid = (TransactionIdIsValid(topXid) &&
1226  TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
1227  appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
1228  for (i = 0; i < snapshot->xcnt; i++)
1229  appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
1230  if (addTopXid)
1231  appendStringInfo(&buf, "xip:%u\n", topXid);
1232 
1233  /*
1234  * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
1235  * we have to cope with possible overflow.
1236  */
1237  if (snapshot->suboverflowed ||
1238  snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
1239  appendStringInfoString(&buf, "sof:1\n");
1240  else
1241  {
1242  appendStringInfoString(&buf, "sof:0\n");
1243  appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
1244  for (i = 0; i < snapshot->subxcnt; i++)
1245  appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
1246  for (i = 0; i < nchildren; i++)
1247  appendStringInfo(&buf, "sxp:%u\n", children[i]);
1248  }
1249  appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
1250 
1251  /*
1252  * Now write the text representation into a file. We first write to a
1253  * ".tmp" filename, and rename to final filename if no error. This
1254  * ensures that no other backend can read an incomplete file
1255  * (ImportSnapshot won't allow it because of its valid-characters check).
1256  */
1257  snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
1258  if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
1259  ereport(ERROR,
1261  errmsg("could not create file \"%s\": %m", pathtmp)));
1262 
1263  if (fwrite(buf.data, buf.len, 1, f) != 1)
1264  ereport(ERROR,
1266  errmsg("could not write to file \"%s\": %m", pathtmp)));
1267 
1268  /* no fsync() since file need not survive a system crash */
1269 
1270  if (FreeFile(f))
1271  ereport(ERROR,
1273  errmsg("could not write to file \"%s\": %m", pathtmp)));
1274 
1275  /*
1276  * Now that we have written everything into a .tmp file, rename the file
1277  * to remove the .tmp suffix.
1278  */
1279  if (rename(pathtmp, path) < 0)
1280  ereport(ERROR,
1282  errmsg("could not rename file \"%s\" to \"%s\": %m",
1283  pathtmp, path)));
1284 
1285  /*
1286  * The basename of the file is what we return from pg_export_snapshot().
1287  * It's already in path in a textual format and we know that the path
1288  * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash
1289  * and pstrdup it so as not to return the address of a local variable.
1290  */
1291  return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
1292 }
1293 
1294 /*
1295  * pg_export_snapshot
1296  * SQL-callable wrapper for ExportSnapshot.
1297  */
1298 Datum
1300 {
1301  char *snapshotName;
1302 
1303  snapshotName = ExportSnapshot(GetActiveSnapshot());
1304  PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
1305 }
1306 
1307 
1308 /*
1309  * Parsing subroutines for ImportSnapshot: parse a line with the given
1310  * prefix followed by a value, and advance *s to the next line. The
1311  * filename is provided for use in error messages.
1312  */
1313 static int
1314 parseIntFromText(const char *prefix, char **s, const char *filename)
1315 {
1316  char *ptr = *s;
1317  int prefixlen = strlen(prefix);
1318  int val;
1319 
1320  if (strncmp(ptr, prefix, prefixlen) != 0)
1321  ereport(ERROR,
1322  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1323  errmsg("invalid snapshot data in file \"%s\"", filename)));
1324  ptr += prefixlen;
1325  if (sscanf(ptr, "%d", &val) != 1)
1326  ereport(ERROR,
1327  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1328  errmsg("invalid snapshot data in file \"%s\"", filename)));
1329  ptr = strchr(ptr, '\n');
1330  if (!ptr)
1331  ereport(ERROR,
1332  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1333  errmsg("invalid snapshot data in file \"%s\"", filename)));
1334  *s = ptr + 1;
1335  return val;
1336 }
1337 
1338 static TransactionId
1339 parseXidFromText(const char *prefix, char **s, const char *filename)
1340 {
1341  char *ptr = *s;
1342  int prefixlen = strlen(prefix);
1344 
1345  if (strncmp(ptr, prefix, prefixlen) != 0)
1346  ereport(ERROR,
1347  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1348  errmsg("invalid snapshot data in file \"%s\"", filename)));
1349  ptr += prefixlen;
1350  if (sscanf(ptr, "%u", &val) != 1)
1351  ereport(ERROR,
1352  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1353  errmsg("invalid snapshot data in file \"%s\"", filename)));
1354  ptr = strchr(ptr, '\n');
1355  if (!ptr)
1356  ereport(ERROR,
1357  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1358  errmsg("invalid snapshot data in file \"%s\"", filename)));
1359  *s = ptr + 1;
1360  return val;
1361 }
1362 
1363 static void
1364 parseVxidFromText(const char *prefix, char **s, const char *filename,
1365  VirtualTransactionId *vxid)
1366 {
1367  char *ptr = *s;
1368  int prefixlen = strlen(prefix);
1369 
1370  if (strncmp(ptr, prefix, prefixlen) != 0)
1371  ereport(ERROR,
1372  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1373  errmsg("invalid snapshot data in file \"%s\"", filename)));
1374  ptr += prefixlen;
1375  if (sscanf(ptr, "%d/%u", &vxid->backendId, &vxid->localTransactionId) != 2)
1376  ereport(ERROR,
1377  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1378  errmsg("invalid snapshot data in file \"%s\"", filename)));
1379  ptr = strchr(ptr, '\n');
1380  if (!ptr)
1381  ereport(ERROR,
1382  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1383  errmsg("invalid snapshot data in file \"%s\"", filename)));
1384  *s = ptr + 1;
1385 }
1386 
1387 /*
1388  * ImportSnapshot
1389  * Import a previously exported snapshot. The argument should be a
1390  * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file.
1391  * This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1392  */
1393 void
1394 ImportSnapshot(const char *idstr)
1395 {
1396  char path[MAXPGPATH];
1397  FILE *f;
1398  struct stat stat_buf;
1399  char *filebuf;
1400  int xcnt;
1401  int i;
1402  VirtualTransactionId src_vxid;
1403  int src_pid;
1404  Oid src_dbid;
1405  int src_isolevel;
1406  bool src_readonly;
1407  SnapshotData snapshot;
1408 
1409  /*
1410  * Must be at top level of a fresh transaction. Note in particular that
1411  * we check we haven't acquired an XID --- if we have, it's conceivable
1412  * that the snapshot would show it as not running, making for very screwy
1413  * behavior.
1414  */
1415  if (FirstSnapshotSet ||
1417  IsSubTransaction())
1418  ereport(ERROR,
1419  (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1420  errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1421 
1422  /*
1423  * If we are in read committed mode then the next query would execute with
1424  * a new snapshot thus making this function call quite useless.
1425  */
1427  ereport(ERROR,
1428  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1429  errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1430 
1431  /*
1432  * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do
1433  * this mainly to prevent reading arbitrary files.
1434  */
1435  if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
1436  ereport(ERROR,
1437  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1438  errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1439 
1440  /* OK, read the file */
1441  snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
1442 
1443  f = AllocateFile(path, PG_BINARY_R);
1444  if (!f)
1445  ereport(ERROR,
1446  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1447  errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1448 
1449  /* get the size of the file so that we know how much memory we need */
1450  if (fstat(fileno(f), &stat_buf))
1451  elog(ERROR, "could not stat file \"%s\": %m", path);
1452 
1453  /* and read the file into a palloc'd string */
1454  filebuf = (char *) palloc(stat_buf.st_size + 1);
1455  if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
1456  elog(ERROR, "could not read file \"%s\": %m", path);
1457 
1458  filebuf[stat_buf.st_size] = '\0';
1459 
1460  FreeFile(f);
1461 
1462  /*
1463  * Construct a snapshot struct by parsing the file content.
1464  */
1465  memset(&snapshot, 0, sizeof(snapshot));
1466 
1467  parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
1468  src_pid = parseIntFromText("pid:", &filebuf, path);
1469  /* we abuse parseXidFromText a bit here ... */
1470  src_dbid = parseXidFromText("dbid:", &filebuf, path);
1471  src_isolevel = parseIntFromText("iso:", &filebuf, path);
1472  src_readonly = parseIntFromText("ro:", &filebuf, path);
1473 
1474  snapshot.snapshot_type = SNAPSHOT_MVCC;
1475 
1476  snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
1477  snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
1478 
1479  snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1480 
1481  /* sanity-check the xid count before palloc */
1482  if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
1483  ereport(ERROR,
1484  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1485  errmsg("invalid snapshot data in file \"%s\"", path)));
1486 
1487  snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1488  for (i = 0; i < xcnt; i++)
1489  snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
1490 
1491  snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1492 
1493  if (!snapshot.suboverflowed)
1494  {
1495  snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1496 
1497  /* sanity-check the xid count before palloc */
1498  if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
1499  ereport(ERROR,
1500  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1501  errmsg("invalid snapshot data in file \"%s\"", path)));
1502 
1503  snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1504  for (i = 0; i < xcnt; i++)
1505  snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
1506  }
1507  else
1508  {
1509  snapshot.subxcnt = 0;
1510  snapshot.subxip = NULL;
1511  }
1512 
1513  snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1514 
1515  /*
1516  * Do some additional sanity checking, just to protect ourselves. We
1517  * don't trouble to check the array elements, just the most critical
1518  * fields.
1519  */
1520  if (!VirtualTransactionIdIsValid(src_vxid) ||
1521  !OidIsValid(src_dbid) ||
1522  !TransactionIdIsNormal(snapshot.xmin) ||
1523  !TransactionIdIsNormal(snapshot.xmax))
1524  ereport(ERROR,
1525  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1526  errmsg("invalid snapshot data in file \"%s\"", path)));
1527 
1528  /*
1529  * If we're serializable, the source transaction must be too, otherwise
1530  * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a
1531  * non-read-only transaction can't adopt a snapshot from a read-only
1532  * transaction, as predicate.c handles the cases very differently.
1533  */
1535  {
1536  if (src_isolevel != XACT_SERIALIZABLE)
1537  ereport(ERROR,
1538  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1539  errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
1540  if (src_readonly && !XactReadOnly)
1541  ereport(ERROR,
1542  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1543  errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1544  }
1545 
1546  /*
1547  * We cannot import a snapshot that was taken in a different database,
1548  * because vacuum calculates OldestXmin on a per-database basis; so the
1549  * source transaction's xmin doesn't protect us from data loss. This
1550  * restriction could be removed if the source transaction were to mark its
1551  * xmin as being globally applicable. But that would require some
1552  * additional syntax, since that has to be known when the snapshot is
1553  * initially taken. (See pgsql-hackers discussion of 2011-10-21.)
1554  */
1555  if (src_dbid != MyDatabaseId)
1556  ereport(ERROR,
1557  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1558  errmsg("cannot import a snapshot from a different database")));
1559 
1560  /* OK, install the snapshot */
1561  SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL);
1562 }
1563 
1564 /*
1565  * XactHasExportedSnapshots
1566  * Test whether current transaction has exported any snapshots.
1567  */
1568 bool
1570 {
1571  return (exportedSnapshots != NIL);
1572 }
1573 
1574 /*
1575  * DeleteAllExportedSnapshotFiles
1576  * Clean up any files that have been left behind by a crashed backend
1577  * that had exported snapshots before it died.
1578  *
1579  * This should be called during database startup or crash recovery.
1580  */
1581 void
1583 {
1584  char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
1585  DIR *s_dir;
1586  struct dirent *s_de;
1587 
1588  /*
1589  * Problems in reading the directory, or unlinking files, are reported at
1590  * LOG level. Since we're running in the startup process, ERROR level
1591  * would prevent database start, and it's not important enough for that.
1592  */
1594 
1595  while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
1596  {
1597  if (strcmp(s_de->d_name, ".") == 0 ||
1598  strcmp(s_de->d_name, "..") == 0)
1599  continue;
1600 
1601  snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1602 
1603  if (unlink(buf) != 0)
1604  ereport(LOG,
1606  errmsg("could not remove file \"%s\": %m", buf)));
1607  }
1608 
1609  FreeDir(s_dir);
1610 }
1611 
1612 /*
1613  * ThereAreNoPriorRegisteredSnapshots
1614  * Is the registered snapshot count less than or equal to one?
1615  *
1616  * Don't use this to settle important decisions. While zero registrations and
1617  * no ActiveSnapshot would confirm a certain idleness, the system makes no
1618  * guarantees about the significance of one registered snapshot.
1619  */
1620 bool
1622 {
1625  return true;
1626 
1627  return false;
1628 }
1629 
1630 /*
1631  * HaveRegisteredOrActiveSnapshots
1632  * Is there any registered or active snapshot?
1633  *
1634  * NB: Unless pushed or active, the cached catalog snapshot will not cause
1635  * this function to return true. That allows this function to be used in
1636  * checks enforcing a longer-lived snapshot.
1637  */
1638 bool
1640 {
1641  if (ActiveSnapshot != NULL)
1642  return true;
1643 
1644  /*
1645  * The catalog snapshot is in RegisteredSnapshots when valid, but can be
1646  * removed at any time due to invalidation processing. If explicitly
1647  * registered more than one snapshot has to be in RegisteredSnapshots.
1648  */
1649  if (CatalogSnapshot != NULL &&
1651  return false;
1652 
1654 }
1655 
1656 
1657 /*
1658  * Return a timestamp that is exactly on a minute boundary.
1659  *
1660  * If the argument is already aligned, return that value, otherwise move to
1661  * the next minute boundary following the given time.
1662  */
1663 static TimestampTz
1665 {
1666  TimestampTz retval = ts + (USECS_PER_MINUTE - 1);
1667 
1668  return retval - (retval % USECS_PER_MINUTE);
1669 }
1670 
1671 /*
1672  * Get current timestamp for snapshots
1673  *
1674  * This is basically GetCurrentTimestamp(), but with a guarantee that
1675  * the result never moves backward.
1676  */
1679 {
1681 
1682  /*
1683  * Don't let time move backward; if it hasn't advanced, use the old value.
1684  */
1686  if (now <= oldSnapshotControl->current_timestamp)
1688  else
1691 
1692  return now;
1693 }
1694 
1695 /*
1696  * Get timestamp through which vacuum may have processed based on last stored
1697  * value for threshold_timestamp.
1698  *
1699  * XXX: So far, we never trust that a 64-bit value can be read atomically; if
1700  * that ever changes, we could get rid of the spinlock here.
1701  */
1704 {
1705  TimestampTz threshold_timestamp;
1706 
1708  threshold_timestamp = oldSnapshotControl->threshold_timestamp;
1710 
1711  return threshold_timestamp;
1712 }
1713 
1714 void
1716 {
1723 }
1724 
1725 /*
1726  * XXX: Magic to keep old_snapshot_threshold tests appear "working". They
1727  * currently are broken, and discussion of what to do about them is
1728  * ongoing. See
1729  * https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
1730  */
1731 void
1733 {
1735 
1737 
1738  ts -= 5 * USECS_PER_SEC;
1739 
1743 }
1744 
1745 /*
1746  * If there is a valid mapping for the timestamp, set *xlimitp to
1747  * that. Returns whether there is such a mapping.
1748  */
1749 static bool
1751 {
1752  bool in_mapping = false;
1753 
1755 
1756  LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
1757 
1760  {
1761  int offset;
1762 
1763  offset = ((ts - oldSnapshotControl->head_timestamp)
1764  / USECS_PER_MINUTE);
1765  if (offset > oldSnapshotControl->count_used - 1)
1766  offset = oldSnapshotControl->count_used - 1;
1767  offset = (oldSnapshotControl->head_offset + offset)
1769 
1770  *xlimitp = oldSnapshotControl->xid_by_minute[offset];
1771 
1772  in_mapping = true;
1773  }
1774 
1775  LWLockRelease(OldSnapshotTimeMapLock);
1776 
1777  return in_mapping;
1778 }
1779 
1780 /*
1781  * TransactionIdLimitedForOldSnapshots
1782  *
1783  * Apply old snapshot limit. This is intended to be called for page pruning
1784  * and table vacuuming, to allow old_snapshot_threshold to override the normal
1785  * global xmin value. Actual testing for snapshot too old will be based on
1786  * whether a snapshot timestamp is prior to the threshold timestamp set in
1787  * this function.
1788  *
1789  * If the limited horizon allows a cleanup action that otherwise would not be
1790  * possible, SetOldSnapshotThresholdTimestamp(*limit_ts, *limit_xid) needs to
1791  * be called before that cleanup action.
1792  */
1793 bool
1795  Relation relation,
1796  TransactionId *limit_xid,
1797  TimestampTz *limit_ts)
1798 {
1799  TimestampTz ts;
1800  TransactionId xlimit = recentXmin;
1801  TransactionId latest_xmin;
1802  TimestampTz next_map_update_ts;
1803  TransactionId threshold_timestamp;
1804  TransactionId threshold_xid;
1805 
1806  Assert(TransactionIdIsNormal(recentXmin));
1808  Assert(limit_ts != NULL && limit_xid != NULL);
1809 
1810  /*
1811  * TestForOldSnapshot() assumes early pruning advances the page LSN, so we
1812  * can't prune early when skipping WAL.
1813  */
1814  if (!RelationAllowsEarlyPruning(relation) || !RelationNeedsWAL(relation))
1815  return false;
1816 
1818 
1820  latest_xmin = oldSnapshotControl->latest_xmin;
1821  next_map_update_ts = oldSnapshotControl->next_map_update;
1823 
1824  /*
1825  * Zero threshold always overrides to latest xmin, if valid. Without some
1826  * heuristic it will find its own snapshot too old on, for example, a
1827  * simple UPDATE -- which would make it useless for most testing, but
1828  * there is no principled way to ensure that it doesn't fail in this way.
1829  * Use a five-second delay to try to get useful testing behavior, but this
1830  * may need adjustment.
1831  */
1832  if (old_snapshot_threshold == 0)
1833  {
1834  if (TransactionIdPrecedes(latest_xmin, MyProc->xmin)
1835  && TransactionIdFollows(latest_xmin, xlimit))
1836  xlimit = latest_xmin;
1837 
1838  ts -= 5 * USECS_PER_SEC;
1839  }
1840  else
1841  {
1844 
1845  /* Check for fast exit without LW locking. */
1847  threshold_timestamp = oldSnapshotControl->threshold_timestamp;
1848  threshold_xid = oldSnapshotControl->threshold_xid;
1850 
1851  if (ts == threshold_timestamp)
1852  {
1853  /*
1854  * Current timestamp is in same bucket as the last limit that was
1855  * applied. Reuse.
1856  */
1857  xlimit = threshold_xid;
1858  }
1859  else if (ts == next_map_update_ts)
1860  {
1861  /*
1862  * FIXME: This branch is super iffy - but that should probably
1863  * fixed separately.
1864  */
1865  xlimit = latest_xmin;
1866  }
1867  else if (GetOldSnapshotFromTimeMapping(ts, &xlimit))
1868  {
1869  }
1870 
1871  /*
1872  * Failsafe protection against vacuuming work of active transaction.
1873  *
1874  * This is not an assertion because we avoid the spinlock for
1875  * performance, leaving open the possibility that xlimit could advance
1876  * and be more current; but it seems prudent to apply this limit. It
1877  * might make pruning a tiny bit less aggressive than it could be, but
1878  * protects against data loss bugs.
1879  */
1880  if (TransactionIdIsNormal(latest_xmin)
1881  && TransactionIdPrecedes(latest_xmin, xlimit))
1882  xlimit = latest_xmin;
1883  }
1884 
1885  if (TransactionIdIsValid(xlimit) &&
1886  TransactionIdFollowsOrEquals(xlimit, recentXmin))
1887  {
1888  *limit_ts = ts;
1889  *limit_xid = xlimit;
1890 
1891  return true;
1892  }
1893 
1894  return false;
1895 }
1896 
1897 /*
1898  * Take care of the circular buffer that maps time to xid.
1899  */
1900 void
1902 {
1903  TimestampTz ts;
1904  TransactionId latest_xmin;
1905  TimestampTz update_ts;
1906  bool map_update_required = false;
1907 
1908  /* Never call this function when old snapshot checking is disabled. */
1910 
1911  ts = AlignTimestampToMinuteBoundary(whenTaken);
1912 
1913  /*
1914  * Keep track of the latest xmin seen by any process. Update mapping with
1915  * a new value when we have crossed a bucket boundary.
1916  */
1918  latest_xmin = oldSnapshotControl->latest_xmin;
1919  update_ts = oldSnapshotControl->next_map_update;
1920  if (ts > update_ts)
1921  {
1923  map_update_required = true;
1924  }
1925  if (TransactionIdFollows(xmin, latest_xmin))
1928 
1929  /* We only needed to update the most recent xmin value. */
1930  if (!map_update_required)
1931  return;
1932 
1933  /* No further tracking needed for 0 (used for testing). */
1934  if (old_snapshot_threshold == 0)
1935  return;
1936 
1937  /*
1938  * We don't want to do something stupid with unusual values, but we don't
1939  * want to litter the log with warnings or break otherwise normal
1940  * processing for this feature; so if something seems unreasonable, just
1941  * log at DEBUG level and return without doing anything.
1942  */
1943  if (whenTaken < 0)
1944  {
1945  elog(DEBUG1,
1946  "MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld",
1947  (long) whenTaken);
1948  return;
1949  }
1950  if (!TransactionIdIsNormal(xmin))
1951  {
1952  elog(DEBUG1,
1953  "MaintainOldSnapshotTimeMapping called with xmin = %lu",
1954  (unsigned long) xmin);
1955  return;
1956  }
1957 
1958  LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
1959 
1965 
1966  if (oldSnapshotControl->count_used == 0)
1967  {
1968  /* set up first entry for empty mapping */
1972  oldSnapshotControl->xid_by_minute[0] = xmin;
1973  }
1974  else if (ts < oldSnapshotControl->head_timestamp)
1975  {
1976  /* old ts; log it at DEBUG */
1977  LWLockRelease(OldSnapshotTimeMapLock);
1978  elog(DEBUG1,
1979  "MaintainOldSnapshotTimeMapping called with old whenTaken = %ld",
1980  (long) whenTaken);
1981  return;
1982  }
1983  else if (ts <= (oldSnapshotControl->head_timestamp +
1985  * USECS_PER_MINUTE)))
1986  {
1987  /* existing mapping; advance xid if possible */
1988  int bucket = (oldSnapshotControl->head_offset
1990  / USECS_PER_MINUTE))
1992 
1994  oldSnapshotControl->xid_by_minute[bucket] = xmin;
1995  }
1996  else
1997  {
1998  /* We need a new bucket, but it might not be the very next one. */
1999  int distance_to_new_tail;
2000  int distance_to_current_tail;
2001  int advance;
2002 
2003  /*
2004  * Our goal is for the new "tail" of the mapping, that is, the entry
2005  * which is newest and thus furthest from the "head" entry, to
2006  * correspond to "ts". Since there's one entry per minute, the
2007  * distance between the current head and the new tail is just the
2008  * number of minutes of difference between ts and the current
2009  * head_timestamp.
2010  *
2011  * The distance from the current head to the current tail is one less
2012  * than the number of entries in the mapping, because the entry at the
2013  * head_offset is for 0 minutes after head_timestamp.
2014  *
2015  * The difference between these two values is the number of minutes by
2016  * which we need to advance the mapping, either adding new entries or
2017  * rotating old ones out.
2018  */
2019  distance_to_new_tail =
2021  distance_to_current_tail =
2023  advance = distance_to_new_tail - distance_to_current_tail;
2024  Assert(advance > 0);
2025 
2026  if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
2027  {
2028  /* Advance is so far that all old data is junk; start over. */
2031  oldSnapshotControl->xid_by_minute[0] = xmin;
2033  }
2034  else
2035  {
2036  /* Store the new value in one or more buckets. */
2037  int i;
2038 
2039  for (i = 0; i < advance; i++)
2040  {
2042  {
2043  /* Map full and new value replaces old head. */
2044  int old_head = oldSnapshotControl->head_offset;
2045 
2046  if (old_head == (OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1))
2048  else
2049  oldSnapshotControl->head_offset = old_head + 1;
2050  oldSnapshotControl->xid_by_minute[old_head] = xmin;
2052  }
2053  else
2054  {
2055  /* Extend map to unused entry. */
2056  int new_tail = (oldSnapshotControl->head_offset
2059 
2061  oldSnapshotControl->xid_by_minute[new_tail] = xmin;
2062  }
2063  }
2064  }
2065  }
2066 
2067  LWLockRelease(OldSnapshotTimeMapLock);
2068 }
2069 
2070 
2071 /*
2072  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
2073  * access to behave just like it did at a certain point in the past.
2074  *
2075  * Needed for logical decoding.
2076  */
2077 void
2078 SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
2079 {
2080  Assert(historic_snapshot != NULL);
2081 
2082  /* setup the timetravel snapshot */
2083  HistoricSnapshot = historic_snapshot;
2084 
2085  /* setup (cmin, cmax) lookup hash */
2086  tuplecid_data = tuplecids;
2087 }
2088 
2089 
2090 /*
2091  * Make catalog snapshots behave normally again.
2092  */
2093 void
2095 {
2096  HistoricSnapshot = NULL;
2097  tuplecid_data = NULL;
2098 }
2099 
2100 bool
2102 {
2103  return HistoricSnapshot != NULL;
2104 }
2105 
2106 HTAB *
2108 {
2110  return tuplecid_data;
2111 }
2112 
2113 /*
2114  * EstimateSnapshotSpace
2115  * Returns the size needed to store the given snapshot.
2116  *
2117  * We are exporting only required fields from the Snapshot, stored in
2118  * SerializedSnapshotData.
2119  */
2120 Size
2122 {
2123  Size size;
2124 
2125  Assert(snap != InvalidSnapshot);
2127 
2128  /* We allocate any XID arrays needed in the same palloc block. */
2129  size = add_size(sizeof(SerializedSnapshotData),
2130  mul_size(snap->xcnt, sizeof(TransactionId)));
2131  if (snap->subxcnt > 0 &&
2132  (!snap->suboverflowed || snap->takenDuringRecovery))
2133  size = add_size(size,
2134  mul_size(snap->subxcnt, sizeof(TransactionId)));
2135 
2136  return size;
2137 }
2138 
2139 /*
2140  * SerializeSnapshot
2141  * Dumps the serialized snapshot (extracted from given snapshot) onto the
2142  * memory location at start_address.
2143  */
2144 void
2145 SerializeSnapshot(Snapshot snapshot, char *start_address)
2146 {
2147  SerializedSnapshotData serialized_snapshot;
2148 
2149  Assert(snapshot->subxcnt >= 0);
2150 
2151  /* Copy all required fields */
2152  serialized_snapshot.xmin = snapshot->xmin;
2153  serialized_snapshot.xmax = snapshot->xmax;
2154  serialized_snapshot.xcnt = snapshot->xcnt;
2155  serialized_snapshot.subxcnt = snapshot->subxcnt;
2156  serialized_snapshot.suboverflowed = snapshot->suboverflowed;
2157  serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
2158  serialized_snapshot.curcid = snapshot->curcid;
2159  serialized_snapshot.whenTaken = snapshot->whenTaken;
2160  serialized_snapshot.lsn = snapshot->lsn;
2161 
2162  /*
2163  * Ignore the SubXID array if it has overflowed, unless the snapshot was
2164  * taken during recovery - in that case, top-level XIDs are in subxip as
2165  * well, and we mustn't lose them.
2166  */
2167  if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
2168  serialized_snapshot.subxcnt = 0;
2169 
2170  /* Copy struct to possibly-unaligned buffer */
2171  memcpy(start_address,
2172  &serialized_snapshot, sizeof(SerializedSnapshotData));
2173 
2174  /* Copy XID array */
2175  if (snapshot->xcnt > 0)
2176  memcpy((TransactionId *) (start_address +
2177  sizeof(SerializedSnapshotData)),
2178  snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
2179 
2180  /*
2181  * Copy SubXID array. Don't bother to copy it if it had overflowed,
2182  * though, because it's not used anywhere in that case. Except if it's a
2183  * snapshot taken during recovery; all the top-level XIDs are in subxip as
2184  * well in that case, so we mustn't lose them.
2185  */
2186  if (serialized_snapshot.subxcnt > 0)
2187  {
2188  Size subxipoff = sizeof(SerializedSnapshotData) +
2189  snapshot->xcnt * sizeof(TransactionId);
2190 
2191  memcpy((TransactionId *) (start_address + subxipoff),
2192  snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
2193  }
2194 }
2195 
2196 /*
2197  * RestoreSnapshot
2198  * Restore a serialized snapshot from the specified address.
2199  *
2200  * The copy is palloc'd in TopTransactionContext and has initial refcounts set
2201  * to 0. The returned snapshot has the copied flag set.
2202  */
2203 Snapshot
2204 RestoreSnapshot(char *start_address)
2205 {
2206  SerializedSnapshotData serialized_snapshot;
2207  Size size;
2208  Snapshot snapshot;
2209  TransactionId *serialized_xids;
2210 
2211  memcpy(&serialized_snapshot, start_address,
2212  sizeof(SerializedSnapshotData));
2213  serialized_xids = (TransactionId *)
2214  (start_address + sizeof(SerializedSnapshotData));
2215 
2216  /* We allocate any XID arrays needed in the same palloc block. */
2217  size = sizeof(SnapshotData)
2218  + serialized_snapshot.xcnt * sizeof(TransactionId)
2219  + serialized_snapshot.subxcnt * sizeof(TransactionId);
2220 
2221  /* Copy all required fields */
2223  snapshot->snapshot_type = SNAPSHOT_MVCC;
2224  snapshot->xmin = serialized_snapshot.xmin;
2225  snapshot->xmax = serialized_snapshot.xmax;
2226  snapshot->xip = NULL;
2227  snapshot->xcnt = serialized_snapshot.xcnt;
2228  snapshot->subxip = NULL;
2229  snapshot->subxcnt = serialized_snapshot.subxcnt;
2230  snapshot->suboverflowed = serialized_snapshot.suboverflowed;
2231  snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
2232  snapshot->curcid = serialized_snapshot.curcid;
2233  snapshot->whenTaken = serialized_snapshot.whenTaken;
2234  snapshot->lsn = serialized_snapshot.lsn;
2235  snapshot->snapXactCompletionCount = 0;
2236 
2237  /* Copy XIDs, if present. */
2238  if (serialized_snapshot.xcnt > 0)
2239  {
2240  snapshot->xip = (TransactionId *) (snapshot + 1);
2241  memcpy(snapshot->xip, serialized_xids,
2242  serialized_snapshot.xcnt * sizeof(TransactionId));
2243  }
2244 
2245  /* Copy SubXIDs, if present. */
2246  if (serialized_snapshot.subxcnt > 0)
2247  {
2248  snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
2249  serialized_snapshot.xcnt;
2250  memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
2251  serialized_snapshot.subxcnt * sizeof(TransactionId));
2252  }
2253 
2254  /* Set the copied flag so that the caller will set refcounts correctly. */
2255  snapshot->regd_count = 0;
2256  snapshot->active_count = 0;
2257  snapshot->copied = true;
2258 
2259  return snapshot;
2260 }
2261 
2262 /*
2263  * Install a restored snapshot as the transaction snapshot.
2264  *
2265  * The second argument is of type void * so that snapmgr.h need not include
2266  * the declaration for PGPROC.
2267  */
2268 void
2269 RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
2270 {
2271  SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc);
2272 }
2273 
2274 /*
2275  * XidInMVCCSnapshot
2276  * Is the given XID still-in-progress according to the snapshot?
2277  *
2278  * Note: GetSnapshotData never stores either top xid or subxids of our own
2279  * backend into a snapshot, so these xids will not be reported as "running"
2280  * by this function. This is OK for current uses, because we always check
2281  * TransactionIdIsCurrentTransactionId first, except when it's known the
2282  * XID could not be ours anyway.
2283  */
2284 bool
2286 {
2287  uint32 i;
2288 
2289  /*
2290  * Make a quick range check to eliminate most XIDs without looking at the
2291  * xip arrays. Note that this is OK even if we convert a subxact XID to
2292  * its parent below, because a subxact with XID < xmin has surely also got
2293  * a parent with XID < xmin, while one with XID >= xmax must belong to a
2294  * parent that was not yet committed at the time of this snapshot.
2295  */
2296 
2297  /* Any xid < xmin is not in-progress */
2298  if (TransactionIdPrecedes(xid, snapshot->xmin))
2299  return false;
2300  /* Any xid >= xmax is in-progress */
2301  if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
2302  return true;
2303 
2304  /*
2305  * Snapshot information is stored slightly differently in snapshots taken
2306  * during recovery.
2307  */
2308  if (!snapshot->takenDuringRecovery)
2309  {
2310  /*
2311  * If the snapshot contains full subxact data, the fastest way to
2312  * check things is just to compare the given XID against both subxact
2313  * XIDs and top-level XIDs. If the snapshot overflowed, we have to
2314  * use pg_subtrans to convert a subxact XID to its parent XID, but
2315  * then we need only look at top-level XIDs not subxacts.
2316  */
2317  if (!snapshot->suboverflowed)
2318  {
2319  /* we have full data, so search subxip */
2320  int32 j;
2321 
2322  for (j = 0; j < snapshot->subxcnt; j++)
2323  {
2324  if (TransactionIdEquals(xid, snapshot->subxip[j]))
2325  return true;
2326  }
2327 
2328  /* not there, fall through to search xip[] */
2329  }
2330  else
2331  {
2332  /*
2333  * Snapshot overflowed, so convert xid to top-level. This is safe
2334  * because we eliminated too-old XIDs above.
2335  */
2336  xid = SubTransGetTopmostTransaction(xid);
2337 
2338  /*
2339  * If xid was indeed a subxact, we might now have an xid < xmin,
2340  * so recheck to avoid an array scan. No point in rechecking
2341  * xmax.
2342  */
2343  if (TransactionIdPrecedes(xid, snapshot->xmin))
2344  return false;
2345  }
2346 
2347  for (i = 0; i < snapshot->xcnt; i++)
2348  {
2349  if (TransactionIdEquals(xid, snapshot->xip[i]))
2350  return true;
2351  }
2352  }
2353  else
2354  {
2355  int32 j;
2356 
2357  /*
2358  * In recovery we store all xids in the subxact array because it is by
2359  * far the bigger array, and we mostly don't know which xids are
2360  * top-level and which are subxacts. The xip array is empty.
2361  *
2362  * We start by searching subtrans, if we overflowed.
2363  */
2364  if (snapshot->suboverflowed)
2365  {
2366  /*
2367  * Snapshot overflowed, so convert xid to top-level. This is safe
2368  * because we eliminated too-old XIDs above.
2369  */
2370  xid = SubTransGetTopmostTransaction(xid);
2371 
2372  /*
2373  * If xid was indeed a subxact, we might now have an xid < xmin,
2374  * so recheck to avoid an array scan. No point in rechecking
2375  * xmax.
2376  */
2377  if (TransactionIdPrecedes(xid, snapshot->xmin))
2378  return false;
2379  }
2380 
2381  /*
2382  * We now have either a top-level xid higher than xmin or an
2383  * indeterminate xid. We don't know whether it's top level or subxact
2384  * but it doesn't matter. If it's present, the xid is visible.
2385  */
2386  for (j = 0; j < snapshot->subxcnt; j++)
2387  {
2388  if (TransactionIdEquals(xid, snapshot->subxip[j]))
2389  return true;
2390  }
2391  }
2392 
2393  return false;
2394 }
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1574
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1538
static int32 next
Definition: blutils.c:219
unsigned int uint32
Definition: c.h:452
#define PG_BINARY_R
Definition: c.h:1281
#define offsetof(type, field)
Definition: c.h:738
signed int int32
Definition: c.h:440
#define PG_BINARY_W
Definition: c.h:1282
uint32 CommandId
Definition: c.h:612
uint32 TransactionId
Definition: c.h:598
#define OidIsValid(objectId)
Definition: c.h:721
size_t Size
Definition: c.h:551
int64 TimestampTz
Definition: timestamp.h:39
#define USECS_PER_SEC
Definition: timestamp.h:133
#define USECS_PER_MINUTE
Definition: timestamp.h:132
int errcode_for_file_access(void)
Definition: elog.c:716
int errdetail(const char *fmt,...)
Definition: elog.c:1037
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define LOG
Definition: elog.h:25
#define WARNING
Definition: elog.h:30
#define DEBUG1
Definition: elog.h:24
#define ERROR
Definition: elog.h:33
#define ereport(elevel,...)
Definition: elog.h:143
int FreeDir(DIR *dir)
Definition: fd.c:2777
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2398
int FreeFile(FILE *file)
Definition: fd.c:2597
struct dirent * ReadDirExtended(DIR *dir, const char *dirname, int elevel)
Definition: fd.c:2740
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2659
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int MyProcPid
Definition: globals.c:44
Oid MyDatabaseId
Definition: globals.c:89
long val
Definition: informix.c:664
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
Definition: list.c:338
#define VirtualTransactionIdIsValid(vxid)
Definition: lock.h:72
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1196
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1800
@ LW_SHARED
Definition: lwlock.h:105
@ LW_EXCLUSIVE
Definition: lwlock.h:104
MemoryContext TopTransactionContext
Definition: mcxt.c:53
char * pstrdup(const char *in)
Definition: mcxt.c:1305
void pfree(void *pointer)
Definition: mcxt.c:1175
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
void * palloc(Size size)
Definition: mcxt.c:1068
#define InvalidPid
Definition: miscadmin.h:32
void pairingheap_remove(pairingheap *heap, pairingheap_node *node)
Definition: pairingheap.c:170
void pairingheap_add(pairingheap *heap, pairingheap_node *node)
Definition: pairingheap.c:112
pairingheap_node * pairingheap_first(pairingheap *heap)
Definition: pairingheap.c:130
#define pairingheap_is_empty(h)
Definition: pairingheap.h:96
#define pairingheap_is_singular(h)
Definition: pairingheap.h:99
#define pairingheap_container(type, membername, ptr)
Definition: pairingheap.h:43
#define pairingheap_const_container(type, membername, ptr)
Definition: pairingheap.h:51
#define pairingheap_reset(h)
Definition: pairingheap.h:93
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
void * arg
#define MAXPGPATH
static char * filename
Definition: pg_dumpall.c:94
#define lfirst(lc)
Definition: pg_list.h:170
static int list_length(const List *l)
Definition: pg_list.h:150
#define NIL
Definition: pg_list.h:66
static char * buf
Definition: pg_test_fsync.c:67
#define snprintf
Definition: port.h:225
uintptr_t Datum
Definition: postgres.h:411
unsigned int Oid
Definition: postgres_ext.h:31
void SetSerializableTransactionSnapshot(Snapshot snapshot, VirtualTransactionId *sourcevxid, int sourcepid)
Definition: predicate.c:1721
Snapshot GetSerializableTransactionSnapshot(Snapshot snapshot)
Definition: predicate.c:1681
int GetMaxSnapshotSubxidCount(void)
Definition: procarray.c:2081
Snapshot GetSnapshotData(Snapshot snapshot)
Definition: procarray.c:2207
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:2070
bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
Definition: procarray.c:2649
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
Definition: procarray.c:2570
#define RelationNeedsWAL(relation)
Definition: rel.h:613
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snapshot)
Definition: resowner.c:1250
void ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snapshot)
Definition: resowner.c:1259
void ResourceOwnerEnlargeSnapshots(ResourceOwner owner)
Definition: resowner.c:1239
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
static Snapshot HistoricSnapshot
Definition: snapmgr.c:105
void SnapshotTooOldMagicForTest(void)
Definition: snapmgr.c:1732
static Snapshot FirstXactSnapshot
Definition: snapmgr.c:156
void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
Definition: snapmgr.c:1901
TransactionId RecentXmin
Definition: snapmgr.c:113
SnapshotData CatalogSnapshotData
Definition: snapmgr.c:97
void UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
Definition: snapmgr.c:882
static void SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid, int sourcepid, PGPROC *sourceproc)
Definition: snapmgr.c:511
TimestampTz GetSnapshotCurrentTimestamp(void)
Definition: snapmgr.c:1678
void AtSubAbort_Snapshot(int level)
Definition: snapmgr.c:987
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2145
void SnapMgrInit(void)
Definition: snapmgr.c:213
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2121
SnapshotData SnapshotSelfData
Definition: snapmgr.c:98
void AtEOXact_Snapshot(bool isCommit, bool resetXmin)
Definition: snapmgr.c:1023
struct ActiveSnapshotElt ActiveSnapshotElt
static Snapshot CurrentSnapshot
Definition: snapmgr.c:102
static Snapshot SecondarySnapshot
Definition: snapmgr.c:103
bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
Definition: snapmgr.c:2285
static List * exportedSnapshots
Definition: snapmgr.c:169
static pairingheap RegisteredSnapshots
Definition: snapmgr.c:146
static TimestampTz AlignTimestampToMinuteBoundary(TimestampTz ts)
Definition: snapmgr.c:1664
bool FirstSnapshotSet
Definition: snapmgr.c:149
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
Snapshot GetLatestSnapshot(void)
Definition: snapmgr.c:325
void TeardownHistoricSnapshot(bool is_error)
Definition: snapmgr.c:2094
Snapshot GetCatalogSnapshot(Oid relid)
Definition: snapmgr.c:386
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:869
static Snapshot CopySnapshot(Snapshot snapshot)
Definition: snapmgr.c:608
static ActiveSnapshotElt * OldestActiveSnapshot
Definition: snapmgr.c:137
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2204
static bool GetOldSnapshotFromTimeMapping(TimestampTz ts, TransactionId *xlimitp)
Definition: snapmgr.c:1750
HTAB * HistoricSnapshotGetTupleCids(void)
Definition: snapmgr.c:2107
void AtSubCommit_Snapshot(int level)
Definition: snapmgr.c:966
void UpdateActiveSnapshotCommandId(void)
Definition: snapmgr.c:745
static void SnapshotResetXmin(void)
Definition: snapmgr.c:942
static int parseIntFromText(const char *prefix, char **s, const char *filename)
Definition: snapmgr.c:1314
static SnapshotData SecondarySnapshotData
Definition: snapmgr.c:96
char * ExportSnapshot(Snapshot snapshot)
Definition: snapmgr.c:1123
static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
Definition: snapmgr.c:908
TransactionId TransactionXmin
Definition: snapmgr.c:112
SnapshotData SnapshotAnyData
Definition: snapmgr.c:99
bool HistoricSnapshotActive(void)
Definition: snapmgr.c:2101
void ImportSnapshot(const char *idstr)
Definition: snapmgr.c:1394
bool ActiveSnapshotSet(void)
Definition: snapmgr.c:815
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:827
bool XactHasExportedSnapshots(void)
Definition: snapmgr.c:1569
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1582
static void parseVxidFromText(const char *prefix, char **s, const char *filename, VirtualTransactionId *vxid)
Definition: snapmgr.c:1364
static void FreeSnapshot(Snapshot snapshot)
Definition: snapmgr.c:664
#define SNAPSHOT_EXPORT_DIR
Definition: snapmgr.c:159
bool HaveRegisteredOrActiveSnapshot(void)
Definition: snapmgr.c:1639
void InvalidateCatalogSnapshotConditionally(void)
Definition: snapmgr.c:477
static SnapshotData CurrentSnapshotData
Definition: snapmgr.c:95
bool ThereAreNoPriorRegisteredSnapshots(void)
Definition: snapmgr.c:1621
void RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
Definition: snapmgr.c:2269
bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation, TransactionId *limit_xid, TimestampTz *limit_ts)
Definition: snapmgr.c:1794
void SnapshotSetCommandId(CommandId curcid)
Definition: snapmgr.c:490
void PushActiveSnapshotWithLevel(Snapshot snap, int snap_level)
Definition: snapmgr.c:696
struct SerializedSnapshotData SerializedSnapshotData
void PopActiveSnapshot(void)
Definition: snapmgr.c:776
void PushCopiedSnapshot(Snapshot snapshot)
Definition: snapmgr.c:733
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:682
static ActiveSnapshotElt * ActiveSnapshot
Definition: snapmgr.c:134
void SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
Definition: snapmgr.c:2078
TimestampTz GetOldSnapshotThresholdTimestamp(void)
Definition: snapmgr.c:1703
int old_snapshot_threshold
Definition: snapmgr.c:78
static HTAB * tuplecid_data
Definition: snapmgr.c:116
Snapshot RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
Definition: snapmgr.c:840
static TransactionId parseXidFromText(const char *prefix, char **s, const char *filename)
Definition: snapmgr.c:1339
void InvalidateCatalogSnapshot(void)
Definition: snapmgr.c:456
Snapshot GetNonHistoricCatalogSnapshot(Oid relid)
Definition: snapmgr.c:408
struct ExportedSnapshot ExportedSnapshot
volatile OldSnapshotControlData * oldSnapshotControl
Definition: snapmgr.c:80
Size SnapMgrShmemSize(void)
Definition: snapmgr.c:197
Snapshot GetOldestSnapshot(void)
Definition: snapmgr.c:357
void SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
Definition: snapmgr.c:1715
static Snapshot CatalogSnapshot
Definition: snapmgr.c:104
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:803
Datum pg_export_snapshot(PG_FUNCTION_ARGS)
Definition: snapmgr.c:1299
#define RelationAllowsEarlyPruning(rel)
Definition: snapmgr.h:38
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:101
#define OLD_SNAPSHOT_TIME_MAP_ENTRIES
Definition: snapmgr.h:32
struct SnapshotData * Snapshot
Definition: snapshot.h:121
struct SnapshotData SnapshotData
@ SNAPSHOT_SELF
Definition: snapshot.h:64
@ SNAPSHOT_MVCC
Definition: snapshot.h:50
@ SNAPSHOT_ANY
Definition: snapshot.h:69
#define InvalidSnapshot
Definition: snapshot.h:123
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
PGPROC * MyProc
Definition: proc.c:68
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
struct ActiveSnapshotElt * as_next
Definition: snapmgr.c:130
Snapshot as_snap
Definition: snapmgr.c:128
Definition: dirent.c:26
char * snapfile
Definition: snapmgr.c:164
Snapshot snapshot
Definition: snapmgr.c:165
Definition: dynahash.c:220
Definition: pg_list.h:52
TimestampTz next_map_update
Definition: old_snapshot.h:34
TimestampTz threshold_timestamp
Definition: old_snapshot.h:36
TransactionId latest_xmin
Definition: old_snapshot.h:33
TimestampTz head_timestamp
Definition: old_snapshot.h:68
TimestampTz current_timestamp
Definition: old_snapshot.h:31
TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER]
Definition: old_snapshot.h:70
TransactionId threshold_xid
Definition: old_snapshot.h:37
Definition: proc.h:160
TransactionId xmin
Definition: proc.h:176
LocalTransactionId lxid
Definition: proc.h:181
BackendId backendId
Definition: proc.h:191
TransactionId xmax
Definition: snapmgr.c:186
TimestampTz whenTaken
Definition: snapmgr.c:192
TransactionId xmin
Definition: snapmgr.c:185
TransactionId xmin
Definition: snapshot.h:157
int32 subxcnt
Definition: snapshot.h:181
bool copied
Definition: snapshot.h:185
uint32 regd_count
Definition: snapshot.h:205
uint32 active_count
Definition: snapshot.h:204
CommandId curcid
Definition: snapshot.h:187
pairingheap_node ph_node
Definition: snapshot.h:206
TimestampTz whenTaken
Definition: snapshot.h:208
uint32 xcnt
Definition: snapshot.h:169
TransactionId * subxip
Definition: snapshot.h:180
uint64 snapXactCompletionCount
Definition: snapshot.h:216
TransactionId xmax
Definition: snapshot.h:158
XLogRecPtr lsn
Definition: snapshot.h:209
SnapshotType snapshot_type
Definition: snapshot.h:144
TransactionId * xip
Definition: snapshot.h:168
bool suboverflowed
Definition: snapshot.h:182
bool takenDuringRecovery
Definition: snapshot.h:184
LocalTransactionId localTransactionId
Definition: lock.h:67
BackendId backendId
Definition: lock.h:66
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
__int64 st_size
Definition: win32_port.h:273
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:150
bool RelationHasSysCache(Oid relid)
Definition: syscache.c:1553
bool RelationInvalidatesSnapshotsOnly(Oid relid)
Definition: syscache.c:1530
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:273
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:292
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:307
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:322
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define FirstNormalTransactionId
Definition: transam.h:34
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
text * cstring_to_text(const char *s)
Definition: varlena.c:188
#define fstat
Definition: win32_port.h:282
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:910
bool XactReadOnly
Definition: xact.c:81
TransactionId GetTopTransactionIdIfAny(void)
Definition: xact.c:428
int XactIsoLevel
Definition: xact.c:78
bool IsSubTransaction(void)
Definition: xact.c:4839
bool IsInParallelMode(void)
Definition: xact.c:1065
int xactGetCommittedChildren(TransactionId **ptr)
Definition: xact.c:5563
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:814
#define XACT_SERIALIZABLE
Definition: xact.h:39
#define IsolationUsesXactSnapshot()
Definition: xact.h:51
#define IsolationIsSerializable()
Definition: xact.h:52
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28