PostgreSQL Source Code  git master
snapmgr.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * snapmgr.c
4  * PostgreSQL snapshot manager
5  *
6  * We keep track of snapshots in two ways: those "registered" by resowner.c,
7  * and the "active snapshot" stack. All snapshots in either of them live in
8  * persistent memory. When a snapshot is no longer in any of these lists
9  * (tracked by separate refcounts on each snapshot), its memory can be freed.
10  *
11  * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
12  * regd_count and list it in RegisteredSnapshots, but this reference is not
13  * tracked by a resource owner. We used to use the TopTransactionResourceOwner
14  * to track this snapshot reference, but that introduces logical circularity
15  * and thus makes it impossible to clean up in a sane fashion. It's better to
16  * handle this reference as an internally-tracked registration, so that this
17  * module is entirely lower-level than ResourceOwners.
18  *
19  * Likewise, any snapshots that have been exported by pg_export_snapshot
20  * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
21  * tracked by any resource owner.
22  *
23  * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
24  * is valid, but is not tracked by any resource owner.
25  *
26  * The same is true for historic snapshots used during logical decoding,
27  * their lifetime is managed separately (as they live longer than one xact.c
28  * transaction).
29  *
30  * These arrangements let us reset MyProc->xmin when there are no snapshots
31  * referenced by this transaction, and advance it when the one with oldest
32  * Xmin is no longer referenced. For simplicity however, only registered
33  * snapshots not active snapshots participate in tracking which one is oldest;
34  * we don't try to change MyProc->xmin except when the active-snapshot
35  * stack is empty.
36  *
37  *
38  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
39  * Portions Copyright (c) 1994, Regents of the University of California
40  *
41  * IDENTIFICATION
42  * src/backend/utils/time/snapmgr.c
43  *
44  *-------------------------------------------------------------------------
45  */
46 #include "postgres.h"
47 
48 #include <sys/stat.h>
49 #include <unistd.h>
50 
51 #include "access/subtrans.h"
52 #include "access/transam.h"
53 #include "access/xact.h"
54 #include "access/xlog.h"
55 #include "catalog/catalog.h"
56 #include "datatype/timestamp.h"
57 #include "lib/pairingheap.h"
58 #include "miscadmin.h"
59 #include "port/pg_lfind.h"
60 #include "storage/predicate.h"
61 #include "storage/proc.h"
62 #include "storage/procarray.h"
63 #include "storage/sinval.h"
64 #include "storage/sinvaladt.h"
65 #include "storage/spin.h"
66 #include "utils/builtins.h"
67 #include "utils/memutils.h"
68 #include "utils/old_snapshot.h"
69 #include "utils/rel.h"
70 #include "utils/resowner_private.h"
71 #include "utils/snapmgr.h"
72 #include "utils/syscache.h"
73 #include "utils/timestamp.h"
74 
75 
76 /*
77  * GUC parameters
78  */
79 int old_snapshot_threshold; /* number of minutes, -1 disables */
80 
82 
83 
84 /*
85  * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
86  * mode, and to the latest one taken in a read-committed transaction.
87  * SecondarySnapshot is a snapshot that's always up-to-date as of the current
88  * instant, even in transaction-snapshot mode. It should only be used for
89  * special-purpose code (say, RI checking.) CatalogSnapshot points to an
90  * MVCC snapshot intended to be used for catalog scans; we must invalidate it
91  * whenever a system catalog change occurs.
92  *
93  * These SnapshotData structs are static to simplify memory allocation
94  * (see the hack in GetSnapshotData to avoid repeated malloc/free).
95  */
101 
102 /* Pointers to valid snapshots */
103 static Snapshot CurrentSnapshot = NULL;
105 static Snapshot CatalogSnapshot = NULL;
107 
108 /*
109  * These are updated by GetSnapshotData. We initialize them this way
110  * for the convenience of TransactionIdIsInProgress: even in bootstrap
111  * mode, we don't want it to say that BootstrapTransactionId is in progress.
112  */
115 
116 /* (table, ctid) => (cmin, cmax) mapping during timetravel */
117 static HTAB *tuplecid_data = NULL;
118 
119 /*
120  * Elements of the active snapshot stack.
121  *
122  * Each element here accounts for exactly one active_count on SnapshotData.
123  *
124  * NB: the code assumes that elements in this list are in non-increasing
125  * order of as_level; also, the list must be NULL-terminated.
126  */
127 typedef struct ActiveSnapshotElt
128 {
130  int as_level;
133 
134 /* Top of the stack of active snapshots */
136 
137 /* Bottom of the stack of active snapshots */
139 
140 /*
141  * Currently registered Snapshots. Ordered in a heap by xmin, so that we can
142  * quickly find the one with lowest xmin, to advance our MyProc->xmin.
143  */
144 static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
145  void *arg);
146 
147 static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL};
148 
149 /* first GetTransactionSnapshot call in a transaction? */
150 bool FirstSnapshotSet = false;
151 
152 /*
153  * Remember the serializable transaction snapshot, if any. We cannot trust
154  * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
155  * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
156  */
158 
159 /* Define pathname of exported-snapshot files */
160 #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
161 
162 /* Structure holding info about exported snapshot. */
163 typedef struct ExportedSnapshot
164 {
165  char *snapfile;
168 
169 /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
171 
172 /* Prototypes for local functions */
174 static Snapshot CopySnapshot(Snapshot snapshot);
175 static void FreeSnapshot(Snapshot snapshot);
176 static void SnapshotResetXmin(void);
177 
178 /*
179  * Snapshot fields to be serialized.
180  *
181  * Only these fields need to be sent to the cooperating backend; the
182  * remaining ones can (and must) be set by the receiver upon restore.
183  */
185 {
196 
197 Size
199 {
200  Size size;
201 
202  size = offsetof(OldSnapshotControlData, xid_by_minute);
203  if (old_snapshot_threshold > 0)
204  size = add_size(size, mul_size(sizeof(TransactionId),
206 
207  return size;
208 }
209 
210 /*
211  * Initialize for managing old snapshot detection.
212  */
213 void
215 {
216  bool found;
217 
218  /*
219  * Create or attach to the OldSnapshotControlData structure.
220  */
222  ShmemInitStruct("OldSnapshotControlData",
223  SnapMgrShmemSize(), &found);
224 
225  if (!found)
226  {
238  }
239 }
240 
241 /*
242  * GetTransactionSnapshot
243  * Get the appropriate snapshot for a new query in a transaction.
244  *
245  * Note that the return value may point at static storage that will be modified
246  * by future calls and by CommandCounterIncrement(). Callers should call
247  * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
248  * used very long.
249  */
250 Snapshot
252 {
253  /*
254  * Return historic snapshot if doing logical decoding. We'll never need a
255  * non-historic transaction snapshot in this (sub-)transaction, so there's
256  * no need to be careful to set one up for later calls to
257  * GetTransactionSnapshot().
258  */
260  {
262  return HistoricSnapshot;
263  }
264 
265  /* First call in transaction? */
266  if (!FirstSnapshotSet)
267  {
268  /*
269  * Don't allow catalog snapshot to be older than xact snapshot. Must
270  * do this first to allow the empty-heap Assert to succeed.
271  */
273 
275  Assert(FirstXactSnapshot == NULL);
276 
277  if (IsInParallelMode())
278  elog(ERROR,
279  "cannot take query snapshot during a parallel operation");
280 
281  /*
282  * In transaction-snapshot mode, the first snapshot must live until
283  * end of xact regardless of what the caller does with it, so we must
284  * make a copy of it rather than returning CurrentSnapshotData
285  * directly. Furthermore, if we're running in serializable mode,
286  * predicate.c needs to wrap the snapshot fetch in its own processing.
287  */
289  {
290  /* First, create the snapshot in CurrentSnapshotData */
293  else
295  /* Make a saved copy */
298  /* Mark it as "registered" in FirstXactSnapshot */
301  }
302  else
304 
305  FirstSnapshotSet = true;
306  return CurrentSnapshot;
307  }
308 
310  return CurrentSnapshot;
311 
312  /* Don't allow catalog snapshot to be older than xact snapshot. */
314 
316 
317  return CurrentSnapshot;
318 }
319 
320 /*
321  * GetLatestSnapshot
322  * Get a snapshot that is up-to-date as of the current instant,
323  * even if we are executing in transaction-snapshot mode.
324  */
325 Snapshot
327 {
328  /*
329  * We might be able to relax this, but nothing that could otherwise work
330  * needs it.
331  */
332  if (IsInParallelMode())
333  elog(ERROR,
334  "cannot update SecondarySnapshot during a parallel operation");
335 
336  /*
337  * So far there are no cases requiring support for GetLatestSnapshot()
338  * during logical decoding, but it wouldn't be hard to add if required.
339  */
341 
342  /* If first call in transaction, go ahead and set the xact snapshot */
343  if (!FirstSnapshotSet)
344  return GetTransactionSnapshot();
345 
347 
348  return SecondarySnapshot;
349 }
350 
351 /*
352  * GetOldestSnapshot
353  *
354  * Get the transaction's oldest known snapshot, as judged by the LSN.
355  * Will return NULL if there are no active or registered snapshots.
356  */
357 Snapshot
359 {
360  Snapshot OldestRegisteredSnapshot = NULL;
361  XLogRecPtr RegisteredLSN = InvalidXLogRecPtr;
362 
364  {
365  OldestRegisteredSnapshot = pairingheap_container(SnapshotData, ph_node,
367  RegisteredLSN = OldestRegisteredSnapshot->lsn;
368  }
369 
370  if (OldestActiveSnapshot != NULL)
371  {
373 
374  if (XLogRecPtrIsInvalid(RegisteredLSN) || RegisteredLSN > ActiveLSN)
376  }
377 
378  return OldestRegisteredSnapshot;
379 }
380 
381 /*
382  * GetCatalogSnapshot
383  * Get a snapshot that is sufficiently up-to-date for scan of the
384  * system catalog with the specified OID.
385  */
386 Snapshot
388 {
389  /*
390  * Return historic snapshot while we're doing logical decoding, so we can
391  * see the appropriate state of the catalog.
392  *
393  * This is the primary reason for needing to reset the system caches after
394  * finishing decoding.
395  */
397  return HistoricSnapshot;
398 
399  return GetNonHistoricCatalogSnapshot(relid);
400 }
401 
402 /*
403  * GetNonHistoricCatalogSnapshot
404  * Get a snapshot that is sufficiently up-to-date for scan of the system
405  * catalog with the specified OID, even while historic snapshots are set
406  * up.
407  */
408 Snapshot
410 {
411  /*
412  * If the caller is trying to scan a relation that has no syscache, no
413  * catcache invalidations will be sent when it is updated. For a few key
414  * relations, snapshot invalidations are sent instead. If we're trying to
415  * scan a relation for which neither catcache nor snapshot invalidations
416  * are sent, we must refresh the snapshot every time.
417  */
418  if (CatalogSnapshot &&
420  !RelationHasSysCache(relid))
422 
423  if (CatalogSnapshot == NULL)
424  {
425  /* Get new snapshot. */
427 
428  /*
429  * Make sure the catalog snapshot will be accounted for in decisions
430  * about advancing PGPROC->xmin. We could apply RegisterSnapshot, but
431  * that would result in making a physical copy, which is overkill; and
432  * it would also create a dependency on some resource owner, which we
433  * do not want for reasons explained at the head of this file. Instead
434  * just shove the CatalogSnapshot into the pairing heap manually. This
435  * has to be reversed in InvalidateCatalogSnapshot, of course.
436  *
437  * NB: it had better be impossible for this to throw error, since the
438  * CatalogSnapshot pointer is already valid.
439  */
441  }
442 
443  return CatalogSnapshot;
444 }
445 
446 /*
447  * InvalidateCatalogSnapshot
448  * Mark the current catalog snapshot, if any, as invalid
449  *
450  * We could change this API to allow the caller to provide more fine-grained
451  * invalidation details, so that a change to relation A wouldn't prevent us
452  * from using our cached snapshot to scan relation B, but so far there's no
453  * evidence that the CPU cycles we spent tracking such fine details would be
454  * well-spent.
455  */
456 void
458 {
459  if (CatalogSnapshot)
460  {
462  CatalogSnapshot = NULL;
464  }
465 }
466 
467 /*
468  * InvalidateCatalogSnapshotConditionally
469  * Drop catalog snapshot if it's the only one we have
470  *
471  * This is called when we are about to wait for client input, so we don't
472  * want to continue holding the catalog snapshot if it might mean that the
473  * global xmin horizon can't advance. However, if there are other snapshots
474  * still active or registered, the catalog snapshot isn't likely to be the
475  * oldest one, so we might as well keep it.
476  */
477 void
479 {
480  if (CatalogSnapshot &&
481  ActiveSnapshot == NULL &&
484 }
485 
486 /*
487  * SnapshotSetCommandId
488  * Propagate CommandCounterIncrement into the static snapshots, if set
489  */
490 void
492 {
493  if (!FirstSnapshotSet)
494  return;
495 
496  if (CurrentSnapshot)
497  CurrentSnapshot->curcid = curcid;
498  if (SecondarySnapshot)
499  SecondarySnapshot->curcid = curcid;
500  /* Should we do the same with CatalogSnapshot? */
501 }
502 
503 /*
504  * SetTransactionSnapshot
505  * Set the transaction's snapshot from an imported MVCC snapshot.
506  *
507  * Note that this is very closely tied to GetTransactionSnapshot --- it
508  * must take care of all the same considerations as the first-snapshot case
509  * in GetTransactionSnapshot.
510  */
511 static void
513  int sourcepid, PGPROC *sourceproc)
514 {
515  /* Caller should have checked this already */
517 
518  /* Better do this to ensure following Assert succeeds. */
520 
522  Assert(FirstXactSnapshot == NULL);
524 
525  /*
526  * Even though we are not going to use the snapshot it computes, we must
527  * call GetSnapshotData, for two reasons: (1) to be sure that
528  * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
529  * the state for GlobalVis*.
530  */
532 
533  /*
534  * Now copy appropriate fields from the source snapshot.
535  */
536  CurrentSnapshot->xmin = sourcesnap->xmin;
537  CurrentSnapshot->xmax = sourcesnap->xmax;
538  CurrentSnapshot->xcnt = sourcesnap->xcnt;
539  Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
540  if (sourcesnap->xcnt > 0)
541  memcpy(CurrentSnapshot->xip, sourcesnap->xip,
542  sourcesnap->xcnt * sizeof(TransactionId));
543  CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
544  Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
545  if (sourcesnap->subxcnt > 0)
546  memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
547  sourcesnap->subxcnt * sizeof(TransactionId));
550  /* NB: curcid should NOT be copied, it's a local matter */
551 
553 
554  /*
555  * Now we have to fix what GetSnapshotData did with MyProc->xmin and
556  * TransactionXmin. There is a race condition: to make sure we are not
557  * causing the global xmin to go backwards, we have to test that the
558  * source transaction is still running, and that has to be done
559  * atomically. So let procarray.c do it.
560  *
561  * Note: in serializable mode, predicate.c will do this a second time. It
562  * doesn't seem worth contorting the logic here to avoid two calls,
563  * especially since it's not clear that predicate.c *must* do this.
564  */
565  if (sourceproc != NULL)
566  {
568  ereport(ERROR,
569  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
570  errmsg("could not import the requested snapshot"),
571  errdetail("The source transaction is not running anymore.")));
572  }
573  else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid))
574  ereport(ERROR,
575  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
576  errmsg("could not import the requested snapshot"),
577  errdetail("The source process with PID %d is not running anymore.",
578  sourcepid)));
579 
580  /*
581  * In transaction-snapshot mode, the first snapshot must live until end of
582  * xact, so we must make a copy of it. Furthermore, if we're running in
583  * serializable mode, predicate.c needs to do its own processing.
584  */
586  {
589  sourcepid);
590  /* Make a saved copy */
593  /* Mark it as "registered" in FirstXactSnapshot */
596  }
597 
598  FirstSnapshotSet = true;
599 }
600 
601 /*
602  * CopySnapshot
603  * Copy the given snapshot.
604  *
605  * The copy is palloc'd in TopTransactionContext and has initial refcounts set
606  * to 0. The returned snapshot has the copied flag set.
607  */
608 static Snapshot
610 {
611  Snapshot newsnap;
612  Size subxipoff;
613  Size size;
614 
615  Assert(snapshot != InvalidSnapshot);
616 
617  /* We allocate any XID arrays needed in the same palloc block. */
618  size = subxipoff = sizeof(SnapshotData) +
619  snapshot->xcnt * sizeof(TransactionId);
620  if (snapshot->subxcnt > 0)
621  size += snapshot->subxcnt * sizeof(TransactionId);
622 
624  memcpy(newsnap, snapshot, sizeof(SnapshotData));
625 
626  newsnap->regd_count = 0;
627  newsnap->active_count = 0;
628  newsnap->copied = true;
629  newsnap->snapXactCompletionCount = 0;
630 
631  /* setup XID array */
632  if (snapshot->xcnt > 0)
633  {
634  newsnap->xip = (TransactionId *) (newsnap + 1);
635  memcpy(newsnap->xip, snapshot->xip,
636  snapshot->xcnt * sizeof(TransactionId));
637  }
638  else
639  newsnap->xip = NULL;
640 
641  /*
642  * Setup subXID array. Don't bother to copy it if it had overflowed,
643  * though, because it's not used anywhere in that case. Except if it's a
644  * snapshot taken during recovery; all the top-level XIDs are in subxip as
645  * well in that case, so we mustn't lose them.
646  */
647  if (snapshot->subxcnt > 0 &&
648  (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
649  {
650  newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
651  memcpy(newsnap->subxip, snapshot->subxip,
652  snapshot->subxcnt * sizeof(TransactionId));
653  }
654  else
655  newsnap->subxip = NULL;
656 
657  return newsnap;
658 }
659 
660 /*
661  * FreeSnapshot
662  * Free the memory associated with a snapshot.
663  */
664 static void
666 {
667  Assert(snapshot->regd_count == 0);
668  Assert(snapshot->active_count == 0);
669  Assert(snapshot->copied);
670 
671  pfree(snapshot);
672 }
673 
674 /*
675  * PushActiveSnapshot
676  * Set the given snapshot as the current active snapshot
677  *
678  * If the passed snapshot is a statically-allocated one, or it is possibly
679  * subject to a future command counter update, create a new long-lived copy
680  * with active refcount=1. Otherwise, only increment the refcount.
681  */
682 void
684 {
686 }
687 
688 /*
689  * PushActiveSnapshotWithLevel
690  * Set the given snapshot as the current active snapshot
691  *
692  * Same as PushActiveSnapshot except that caller can specify the
693  * transaction nesting level that "owns" the snapshot. This level
694  * must not be deeper than the current top of the snapshot stack.
695  */
696 void
697 PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
698 {
699  ActiveSnapshotElt *newactive;
700 
701  Assert(snapshot != InvalidSnapshot);
702  Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level);
703 
705 
706  /*
707  * Checking SecondarySnapshot is probably useless here, but it seems
708  * better to be sure.
709  */
710  if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot ||
711  !snapshot->copied)
712  newactive->as_snap = CopySnapshot(snapshot);
713  else
714  newactive->as_snap = snapshot;
715 
716  newactive->as_next = ActiveSnapshot;
717  newactive->as_level = snap_level;
718 
719  newactive->as_snap->active_count++;
720 
721  ActiveSnapshot = newactive;
722  if (OldestActiveSnapshot == NULL)
724 }
725 
726 /*
727  * PushCopiedSnapshot
728  * As above, except forcibly copy the presented snapshot.
729  *
730  * This should be used when the ActiveSnapshot has to be modifiable, for
731  * example if the caller intends to call UpdateActiveSnapshotCommandId.
732  * The new snapshot will be released when popped from the stack.
733  */
734 void
736 {
737  PushActiveSnapshot(CopySnapshot(snapshot));
738 }
739 
740 /*
741  * UpdateActiveSnapshotCommandId
742  *
743  * Update the current CID of the active snapshot. This can only be applied
744  * to a snapshot that is not referenced elsewhere.
745  */
746 void
748 {
749  CommandId save_curcid,
750  curcid;
751 
752  Assert(ActiveSnapshot != NULL);
755 
756  /*
757  * Don't allow modification of the active snapshot during parallel
758  * operation. We share the snapshot to worker backends at the beginning
759  * of parallel operation, so any change to the snapshot can lead to
760  * inconsistencies. We have other defenses against
761  * CommandCounterIncrement, but there are a few places that call this
762  * directly, so we put an additional guard here.
763  */
764  save_curcid = ActiveSnapshot->as_snap->curcid;
765  curcid = GetCurrentCommandId(false);
766  if (IsInParallelMode() && save_curcid != curcid)
767  elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
768  ActiveSnapshot->as_snap->curcid = curcid;
769 }
770 
771 /*
772  * PopActiveSnapshot
773  *
774  * Remove the topmost snapshot from the active snapshot stack, decrementing the
775  * reference count, and free it if this was the last reference.
776  */
777 void
779 {
780  ActiveSnapshotElt *newstack;
781 
782  newstack = ActiveSnapshot->as_next;
783 
785 
787 
788  if (ActiveSnapshot->as_snap->active_count == 0 &&
791 
793  ActiveSnapshot = newstack;
794  if (ActiveSnapshot == NULL)
795  OldestActiveSnapshot = NULL;
796 
798 }
799 
800 /*
801  * GetActiveSnapshot
802  * Return the topmost snapshot in the Active stack.
803  */
804 Snapshot
806 {
807  Assert(ActiveSnapshot != NULL);
808 
809  return ActiveSnapshot->as_snap;
810 }
811 
812 /*
813  * ActiveSnapshotSet
814  * Return whether there is at least one snapshot in the Active stack
815  */
816 bool
818 {
819  return ActiveSnapshot != NULL;
820 }
821 
822 /*
823  * RegisterSnapshot
824  * Register a snapshot as being in use by the current resource owner
825  *
826  * If InvalidSnapshot is passed, it is not registered.
827  */
828 Snapshot
830 {
831  if (snapshot == InvalidSnapshot)
832  return InvalidSnapshot;
833 
835 }
836 
837 /*
838  * RegisterSnapshotOnOwner
839  * As above, but use the specified resource owner
840  */
841 Snapshot
843 {
844  Snapshot snap;
845 
846  if (snapshot == InvalidSnapshot)
847  return InvalidSnapshot;
848 
849  /* Static snapshot? Create a persistent copy */
850  snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
851 
852  /* and tell resowner.c about it */
854  snap->regd_count++;
855  ResourceOwnerRememberSnapshot(owner, snap);
856 
857  if (snap->regd_count == 1)
859 
860  return snap;
861 }
862 
863 /*
864  * UnregisterSnapshot
865  *
866  * Decrement the reference count of a snapshot, remove the corresponding
867  * reference from CurrentResourceOwner, and free the snapshot if no more
868  * references remain.
869  */
870 void
872 {
873  if (snapshot == NULL)
874  return;
875 
877 }
878 
879 /*
880  * UnregisterSnapshotFromOwner
881  * As above, but use the specified resource owner
882  */
883 void
885 {
886  if (snapshot == NULL)
887  return;
888 
889  Assert(snapshot->regd_count > 0);
891 
892  ResourceOwnerForgetSnapshot(owner, snapshot);
893 
894  snapshot->regd_count--;
895  if (snapshot->regd_count == 0)
897 
898  if (snapshot->regd_count == 0 && snapshot->active_count == 0)
899  {
900  FreeSnapshot(snapshot);
902  }
903 }
904 
905 /*
906  * Comparison function for RegisteredSnapshots heap. Snapshots are ordered
907  * by xmin, so that the snapshot with smallest xmin is at the top.
908  */
909 static int
911 {
912  const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
913  const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
914 
915  if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
916  return 1;
917  else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
918  return -1;
919  else
920  return 0;
921 }
922 
923 /*
924  * SnapshotResetXmin
925  *
926  * If there are no more snapshots, we can reset our PGPROC->xmin to
927  * InvalidTransactionId. Note we can do this without locking because we assume
928  * that storing an Xid is atomic.
929  *
930  * Even if there are some remaining snapshots, we may be able to advance our
931  * PGPROC->xmin to some degree. This typically happens when a portal is
932  * dropped. For efficiency, we only consider recomputing PGPROC->xmin when
933  * the active snapshot stack is empty; this allows us not to need to track
934  * which active snapshot is oldest.
935  *
936  * Note: it's tempting to use GetOldestSnapshot() here so that we can include
937  * active snapshots in the calculation. However, that compares by LSN not
938  * xmin so it's not entirely clear that it's the same thing. Also, we'd be
939  * critically dependent on the assumption that the bottommost active snapshot
940  * stack entry has the oldest xmin. (Current uses of GetOldestSnapshot() are
941  * not actually critical, but this would be.)
942  */
943 static void
945 {
946  Snapshot minSnapshot;
947 
948  if (ActiveSnapshot != NULL)
949  return;
950 
952  {
954  return;
955  }
956 
957  minSnapshot = pairingheap_container(SnapshotData, ph_node,
959 
960  if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
961  MyProc->xmin = minSnapshot->xmin;
962 }
963 
964 /*
965  * AtSubCommit_Snapshot
966  */
967 void
969 {
970  ActiveSnapshotElt *active;
971 
972  /*
973  * Relabel the active snapshots set in this subtransaction as though they
974  * are owned by the parent subxact.
975  */
976  for (active = ActiveSnapshot; active != NULL; active = active->as_next)
977  {
978  if (active->as_level < level)
979  break;
980  active->as_level = level - 1;
981  }
982 }
983 
984 /*
985  * AtSubAbort_Snapshot
986  * Clean up snapshots after a subtransaction abort
987  */
988 void
990 {
991  /* Forget the active snapshots set by this subtransaction */
992  while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
993  {
995 
997 
998  /*
999  * Decrement the snapshot's active count. If it's still registered or
1000  * marked as active by an outer subtransaction, we can't free it yet.
1001  */
1004 
1005  if (ActiveSnapshot->as_snap->active_count == 0 &&
1008 
1009  /* and free the stack element */
1011 
1012  ActiveSnapshot = next;
1013  if (ActiveSnapshot == NULL)
1014  OldestActiveSnapshot = NULL;
1015  }
1016 
1018 }
1019 
1020 /*
1021  * AtEOXact_Snapshot
1022  * Snapshot manager's cleanup function for end of transaction
1023  */
1024 void
1025 AtEOXact_Snapshot(bool isCommit, bool resetXmin)
1026 {
1027  /*
1028  * In transaction-snapshot mode we must release our privately-managed
1029  * reference to the transaction snapshot. We must remove it from
1030  * RegisteredSnapshots to keep the check below happy. But we don't bother
1031  * to do FreeSnapshot, for two reasons: the memory will go away with
1032  * TopTransactionContext anyway, and if someone has left the snapshot
1033  * stacked as active, we don't want the code below to be chasing through a
1034  * dangling pointer.
1035  */
1036  if (FirstXactSnapshot != NULL)
1037  {
1041  }
1042  FirstXactSnapshot = NULL;
1043 
1044  /*
1045  * If we exported any snapshots, clean them up.
1046  */
1047  if (exportedSnapshots != NIL)
1048  {
1049  ListCell *lc;
1050 
1051  /*
1052  * Get rid of the files. Unlink failure is only a WARNING because (1)
1053  * it's too late to abort the transaction, and (2) leaving a leaked
1054  * file around has little real consequence anyway.
1055  *
1056  * We also need to remove the snapshots from RegisteredSnapshots to
1057  * prevent a warning below.
1058  *
1059  * As with the FirstXactSnapshot, we don't need to free resources of
1060  * the snapshot itself as it will go away with the memory context.
1061  */
1062  foreach(lc, exportedSnapshots)
1063  {
1064  ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc);
1065 
1066  if (unlink(esnap->snapfile))
1067  elog(WARNING, "could not unlink file \"%s\": %m",
1068  esnap->snapfile);
1069 
1071  &esnap->snapshot->ph_node);
1072  }
1073 
1075  }
1076 
1077  /* Drop catalog snapshot if any */
1079 
1080  /* On commit, complain about leftover snapshots */
1081  if (isCommit)
1082  {
1083  ActiveSnapshotElt *active;
1084 
1086  elog(WARNING, "registered snapshots seem to remain after cleanup");
1087 
1088  /* complain about unpopped active snapshots */
1089  for (active = ActiveSnapshot; active != NULL; active = active->as_next)
1090  elog(WARNING, "snapshot %p still active", active);
1091  }
1092 
1093  /*
1094  * And reset our state. We don't need to free the memory explicitly --
1095  * it'll go away with TopTransactionContext.
1096  */
1097  ActiveSnapshot = NULL;
1098  OldestActiveSnapshot = NULL;
1100 
1101  CurrentSnapshot = NULL;
1102  SecondarySnapshot = NULL;
1103 
1104  FirstSnapshotSet = false;
1105 
1106  /*
1107  * During normal commit processing, we call ProcArrayEndTransaction() to
1108  * reset the MyProc->xmin. That call happens prior to the call to
1109  * AtEOXact_Snapshot(), so we need not touch xmin here at all.
1110  */
1111  if (resetXmin)
1113 
1114  Assert(resetXmin || MyProc->xmin == 0);
1115 }
1116 
1117 
1118 /*
1119  * ExportSnapshot
1120  * Export the snapshot to a file so that other backends can import it.
1121  * Returns the token (the file name) that can be used to import this
1122  * snapshot.
1123  */
1124 char *
1126 {
1127  TransactionId topXid;
1128  TransactionId *children;
1129  ExportedSnapshot *esnap;
1130  int nchildren;
1131  int addTopXid;
1133  FILE *f;
1134  int i;
1135  MemoryContext oldcxt;
1136  char path[MAXPGPATH];
1137  char pathtmp[MAXPGPATH];
1138 
1139  /*
1140  * It's tempting to call RequireTransactionBlock here, since it's not very
1141  * useful to export a snapshot that will disappear immediately afterwards.
1142  * However, we haven't got enough information to do that, since we don't
1143  * know if we're at top level or not. For example, we could be inside a
1144  * plpgsql function that is going to fire off other transactions via
1145  * dblink. Rather than disallow perfectly legitimate usages, don't make a
1146  * check.
1147  *
1148  * Also note that we don't make any restriction on the transaction's
1149  * isolation level; however, importers must check the level if they are
1150  * serializable.
1151  */
1152 
1153  /*
1154  * Get our transaction ID if there is one, to include in the snapshot.
1155  */
1156  topXid = GetTopTransactionIdIfAny();
1157 
1158  /*
1159  * We cannot export a snapshot from a subtransaction because there's no
1160  * easy way for importers to verify that the same subtransaction is still
1161  * running.
1162  */
1163  if (IsSubTransaction())
1164  ereport(ERROR,
1165  (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1166  errmsg("cannot export a snapshot from a subtransaction")));
1167 
1168  /*
1169  * We do however allow previous committed subtransactions to exist.
1170  * Importers of the snapshot must see them as still running, so get their
1171  * XIDs to add them to the snapshot.
1172  */
1173  nchildren = xactGetCommittedChildren(&children);
1174 
1175  /*
1176  * Generate file path for the snapshot. We start numbering of snapshots
1177  * inside the transaction from 1.
1178  */
1179  snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
1181 
1182  /*
1183  * Copy the snapshot into TopTransactionContext, add it to the
1184  * exportedSnapshots list, and mark it pseudo-registered. We do this to
1185  * ensure that the snapshot's xmin is honored for the rest of the
1186  * transaction.
1187  */
1188  snapshot = CopySnapshot(snapshot);
1189 
1191  esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot));
1192  esnap->snapfile = pstrdup(path);
1193  esnap->snapshot = snapshot;
1195  MemoryContextSwitchTo(oldcxt);
1196 
1197  snapshot->regd_count++;
1199 
1200  /*
1201  * Fill buf with a text serialization of the snapshot, plus identification
1202  * data about this transaction. The format expected by ImportSnapshot is
1203  * pretty rigid: each line must be fieldname:value.
1204  */
1205  initStringInfo(&buf);
1206 
1207  appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->backendId, MyProc->lxid);
1208  appendStringInfo(&buf, "pid:%d\n", MyProcPid);
1209  appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
1210  appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
1211  appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
1212 
1213  appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
1214  appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
1215 
1216  /*
1217  * We must include our own top transaction ID in the top-xid data, since
1218  * by definition we will still be running when the importing transaction
1219  * adopts the snapshot, but GetSnapshotData never includes our own XID in
1220  * the snapshot. (There must, therefore, be enough room to add it.)
1221  *
1222  * However, it could be that our topXid is after the xmax, in which case
1223  * we shouldn't include it because xip[] members are expected to be before
1224  * xmax. (We need not make the same check for subxip[] members, see
1225  * snapshot.h.)
1226  */
1227  addTopXid = (TransactionIdIsValid(topXid) &&
1228  TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
1229  appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
1230  for (i = 0; i < snapshot->xcnt; i++)
1231  appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
1232  if (addTopXid)
1233  appendStringInfo(&buf, "xip:%u\n", topXid);
1234 
1235  /*
1236  * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
1237  * we have to cope with possible overflow.
1238  */
1239  if (snapshot->suboverflowed ||
1240  snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
1241  appendStringInfoString(&buf, "sof:1\n");
1242  else
1243  {
1244  appendStringInfoString(&buf, "sof:0\n");
1245  appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
1246  for (i = 0; i < snapshot->subxcnt; i++)
1247  appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
1248  for (i = 0; i < nchildren; i++)
1249  appendStringInfo(&buf, "sxp:%u\n", children[i]);
1250  }
1251  appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
1252 
1253  /*
1254  * Now write the text representation into a file. We first write to a
1255  * ".tmp" filename, and rename to final filename if no error. This
1256  * ensures that no other backend can read an incomplete file
1257  * (ImportSnapshot won't allow it because of its valid-characters check).
1258  */
1259  snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
1260  if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
1261  ereport(ERROR,
1263  errmsg("could not create file \"%s\": %m", pathtmp)));
1264 
1265  if (fwrite(buf.data, buf.len, 1, f) != 1)
1266  ereport(ERROR,
1268  errmsg("could not write to file \"%s\": %m", pathtmp)));
1269 
1270  /* no fsync() since file need not survive a system crash */
1271 
1272  if (FreeFile(f))
1273  ereport(ERROR,
1275  errmsg("could not write to file \"%s\": %m", pathtmp)));
1276 
1277  /*
1278  * Now that we have written everything into a .tmp file, rename the file
1279  * to remove the .tmp suffix.
1280  */
1281  if (rename(pathtmp, path) < 0)
1282  ereport(ERROR,
1284  errmsg("could not rename file \"%s\" to \"%s\": %m",
1285  pathtmp, path)));
1286 
1287  /*
1288  * The basename of the file is what we return from pg_export_snapshot().
1289  * It's already in path in a textual format and we know that the path
1290  * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash
1291  * and pstrdup it so as not to return the address of a local variable.
1292  */
1293  return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
1294 }
1295 
1296 /*
1297  * pg_export_snapshot
1298  * SQL-callable wrapper for ExportSnapshot.
1299  */
1300 Datum
1302 {
1303  char *snapshotName;
1304 
1305  snapshotName = ExportSnapshot(GetActiveSnapshot());
1306  PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
1307 }
1308 
1309 
1310 /*
1311  * Parsing subroutines for ImportSnapshot: parse a line with the given
1312  * prefix followed by a value, and advance *s to the next line. The
1313  * filename is provided for use in error messages.
1314  */
1315 static int
1316 parseIntFromText(const char *prefix, char **s, const char *filename)
1317 {
1318  char *ptr = *s;
1319  int prefixlen = strlen(prefix);
1320  int val;
1321 
1322  if (strncmp(ptr, prefix, prefixlen) != 0)
1323  ereport(ERROR,
1324  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1325  errmsg("invalid snapshot data in file \"%s\"", filename)));
1326  ptr += prefixlen;
1327  if (sscanf(ptr, "%d", &val) != 1)
1328  ereport(ERROR,
1329  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1330  errmsg("invalid snapshot data in file \"%s\"", filename)));
1331  ptr = strchr(ptr, '\n');
1332  if (!ptr)
1333  ereport(ERROR,
1334  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1335  errmsg("invalid snapshot data in file \"%s\"", filename)));
1336  *s = ptr + 1;
1337  return val;
1338 }
1339 
1340 static TransactionId
1341 parseXidFromText(const char *prefix, char **s, const char *filename)
1342 {
1343  char *ptr = *s;
1344  int prefixlen = strlen(prefix);
1346 
1347  if (strncmp(ptr, prefix, prefixlen) != 0)
1348  ereport(ERROR,
1349  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1350  errmsg("invalid snapshot data in file \"%s\"", filename)));
1351  ptr += prefixlen;
1352  if (sscanf(ptr, "%u", &val) != 1)
1353  ereport(ERROR,
1354  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1355  errmsg("invalid snapshot data in file \"%s\"", filename)));
1356  ptr = strchr(ptr, '\n');
1357  if (!ptr)
1358  ereport(ERROR,
1359  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1360  errmsg("invalid snapshot data in file \"%s\"", filename)));
1361  *s = ptr + 1;
1362  return val;
1363 }
1364 
1365 static void
1366 parseVxidFromText(const char *prefix, char **s, const char *filename,
1367  VirtualTransactionId *vxid)
1368 {
1369  char *ptr = *s;
1370  int prefixlen = strlen(prefix);
1371 
1372  if (strncmp(ptr, prefix, prefixlen) != 0)
1373  ereport(ERROR,
1374  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1375  errmsg("invalid snapshot data in file \"%s\"", filename)));
1376  ptr += prefixlen;
1377  if (sscanf(ptr, "%d/%u", &vxid->backendId, &vxid->localTransactionId) != 2)
1378  ereport(ERROR,
1379  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1380  errmsg("invalid snapshot data in file \"%s\"", filename)));
1381  ptr = strchr(ptr, '\n');
1382  if (!ptr)
1383  ereport(ERROR,
1384  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1385  errmsg("invalid snapshot data in file \"%s\"", filename)));
1386  *s = ptr + 1;
1387 }
1388 
1389 /*
1390  * ImportSnapshot
1391  * Import a previously exported snapshot. The argument should be a
1392  * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file.
1393  * This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1394  */
1395 void
1396 ImportSnapshot(const char *idstr)
1397 {
1398  char path[MAXPGPATH];
1399  FILE *f;
1400  struct stat stat_buf;
1401  char *filebuf;
1402  int xcnt;
1403  int i;
1404  VirtualTransactionId src_vxid;
1405  int src_pid;
1406  Oid src_dbid;
1407  int src_isolevel;
1408  bool src_readonly;
1409  SnapshotData snapshot;
1410 
1411  /*
1412  * Must be at top level of a fresh transaction. Note in particular that
1413  * we check we haven't acquired an XID --- if we have, it's conceivable
1414  * that the snapshot would show it as not running, making for very screwy
1415  * behavior.
1416  */
1417  if (FirstSnapshotSet ||
1419  IsSubTransaction())
1420  ereport(ERROR,
1421  (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1422  errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1423 
1424  /*
1425  * If we are in read committed mode then the next query would execute with
1426  * a new snapshot thus making this function call quite useless.
1427  */
1429  ereport(ERROR,
1430  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1431  errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1432 
1433  /*
1434  * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do
1435  * this mainly to prevent reading arbitrary files.
1436  */
1437  if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
1438  ereport(ERROR,
1439  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1440  errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1441 
1442  /* OK, read the file */
1443  snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
1444 
1445  f = AllocateFile(path, PG_BINARY_R);
1446  if (!f)
1447  ereport(ERROR,
1448  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1449  errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1450 
1451  /* get the size of the file so that we know how much memory we need */
1452  if (fstat(fileno(f), &stat_buf))
1453  elog(ERROR, "could not stat file \"%s\": %m", path);
1454 
1455  /* and read the file into a palloc'd string */
1456  filebuf = (char *) palloc(stat_buf.st_size + 1);
1457  if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
1458  elog(ERROR, "could not read file \"%s\": %m", path);
1459 
1460  filebuf[stat_buf.st_size] = '\0';
1461 
1462  FreeFile(f);
1463 
1464  /*
1465  * Construct a snapshot struct by parsing the file content.
1466  */
1467  memset(&snapshot, 0, sizeof(snapshot));
1468 
1469  parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
1470  src_pid = parseIntFromText("pid:", &filebuf, path);
1471  /* we abuse parseXidFromText a bit here ... */
1472  src_dbid = parseXidFromText("dbid:", &filebuf, path);
1473  src_isolevel = parseIntFromText("iso:", &filebuf, path);
1474  src_readonly = parseIntFromText("ro:", &filebuf, path);
1475 
1476  snapshot.snapshot_type = SNAPSHOT_MVCC;
1477 
1478  snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
1479  snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
1480 
1481  snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1482 
1483  /* sanity-check the xid count before palloc */
1484  if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
1485  ereport(ERROR,
1486  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1487  errmsg("invalid snapshot data in file \"%s\"", path)));
1488 
1489  snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1490  for (i = 0; i < xcnt; i++)
1491  snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
1492 
1493  snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1494 
1495  if (!snapshot.suboverflowed)
1496  {
1497  snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1498 
1499  /* sanity-check the xid count before palloc */
1500  if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
1501  ereport(ERROR,
1502  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1503  errmsg("invalid snapshot data in file \"%s\"", path)));
1504 
1505  snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1506  for (i = 0; i < xcnt; i++)
1507  snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
1508  }
1509  else
1510  {
1511  snapshot.subxcnt = 0;
1512  snapshot.subxip = NULL;
1513  }
1514 
1515  snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1516 
1517  /*
1518  * Do some additional sanity checking, just to protect ourselves. We
1519  * don't trouble to check the array elements, just the most critical
1520  * fields.
1521  */
1522  if (!VirtualTransactionIdIsValid(src_vxid) ||
1523  !OidIsValid(src_dbid) ||
1524  !TransactionIdIsNormal(snapshot.xmin) ||
1525  !TransactionIdIsNormal(snapshot.xmax))
1526  ereport(ERROR,
1527  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1528  errmsg("invalid snapshot data in file \"%s\"", path)));
1529 
1530  /*
1531  * If we're serializable, the source transaction must be too, otherwise
1532  * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a
1533  * non-read-only transaction can't adopt a snapshot from a read-only
1534  * transaction, as predicate.c handles the cases very differently.
1535  */
1537  {
1538  if (src_isolevel != XACT_SERIALIZABLE)
1539  ereport(ERROR,
1540  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1541  errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
1542  if (src_readonly && !XactReadOnly)
1543  ereport(ERROR,
1544  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1545  errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1546  }
1547 
1548  /*
1549  * We cannot import a snapshot that was taken in a different database,
1550  * because vacuum calculates OldestXmin on a per-database basis; so the
1551  * source transaction's xmin doesn't protect us from data loss. This
1552  * restriction could be removed if the source transaction were to mark its
1553  * xmin as being globally applicable. But that would require some
1554  * additional syntax, since that has to be known when the snapshot is
1555  * initially taken. (See pgsql-hackers discussion of 2011-10-21.)
1556  */
1557  if (src_dbid != MyDatabaseId)
1558  ereport(ERROR,
1559  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1560  errmsg("cannot import a snapshot from a different database")));
1561 
1562  /* OK, install the snapshot */
1563  SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL);
1564 }
1565 
1566 /*
1567  * XactHasExportedSnapshots
1568  * Test whether current transaction has exported any snapshots.
1569  */
1570 bool
1572 {
1573  return (exportedSnapshots != NIL);
1574 }
1575 
1576 /*
1577  * DeleteAllExportedSnapshotFiles
1578  * Clean up any files that have been left behind by a crashed backend
1579  * that had exported snapshots before it died.
1580  *
1581  * This should be called during database startup or crash recovery.
1582  */
1583 void
1585 {
1586  char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
1587  DIR *s_dir;
1588  struct dirent *s_de;
1589 
1590  /*
1591  * Problems in reading the directory, or unlinking files, are reported at
1592  * LOG level. Since we're running in the startup process, ERROR level
1593  * would prevent database start, and it's not important enough for that.
1594  */
1596 
1597  while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
1598  {
1599  if (strcmp(s_de->d_name, ".") == 0 ||
1600  strcmp(s_de->d_name, "..") == 0)
1601  continue;
1602 
1603  snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1604 
1605  if (unlink(buf) != 0)
1606  ereport(LOG,
1608  errmsg("could not remove file \"%s\": %m", buf)));
1609  }
1610 
1611  FreeDir(s_dir);
1612 }
1613 
1614 /*
1615  * ThereAreNoPriorRegisteredSnapshots
1616  * Is the registered snapshot count less than or equal to one?
1617  *
1618  * Don't use this to settle important decisions. While zero registrations and
1619  * no ActiveSnapshot would confirm a certain idleness, the system makes no
1620  * guarantees about the significance of one registered snapshot.
1621  */
1622 bool
1624 {
1627  return true;
1628 
1629  return false;
1630 }
1631 
1632 /*
1633  * HaveRegisteredOrActiveSnapshots
1634  * Is there any registered or active snapshot?
1635  *
1636  * NB: Unless pushed or active, the cached catalog snapshot will not cause
1637  * this function to return true. That allows this function to be used in
1638  * checks enforcing a longer-lived snapshot.
1639  */
1640 bool
1642 {
1643  if (ActiveSnapshot != NULL)
1644  return true;
1645 
1646  /*
1647  * The catalog snapshot is in RegisteredSnapshots when valid, but can be
1648  * removed at any time due to invalidation processing. If explicitly
1649  * registered more than one snapshot has to be in RegisteredSnapshots.
1650  */
1651  if (CatalogSnapshot != NULL &&
1653  return false;
1654 
1656 }
1657 
1658 
1659 /*
1660  * Return a timestamp that is exactly on a minute boundary.
1661  *
1662  * If the argument is already aligned, return that value, otherwise move to
1663  * the next minute boundary following the given time.
1664  */
1665 static TimestampTz
1667 {
1668  TimestampTz retval = ts + (USECS_PER_MINUTE - 1);
1669 
1670  return retval - (retval % USECS_PER_MINUTE);
1671 }
1672 
1673 /*
1674  * Get current timestamp for snapshots
1675  *
1676  * This is basically GetCurrentTimestamp(), but with a guarantee that
1677  * the result never moves backward.
1678  */
1681 {
1683 
1684  /*
1685  * Don't let time move backward; if it hasn't advanced, use the old value.
1686  */
1688  if (now <= oldSnapshotControl->current_timestamp)
1690  else
1693 
1694  return now;
1695 }
1696 
1697 /*
1698  * Get timestamp through which vacuum may have processed based on last stored
1699  * value for threshold_timestamp.
1700  *
1701  * XXX: So far, we never trust that a 64-bit value can be read atomically; if
1702  * that ever changes, we could get rid of the spinlock here.
1703  */
1706 {
1707  TimestampTz threshold_timestamp;
1708 
1710  threshold_timestamp = oldSnapshotControl->threshold_timestamp;
1712 
1713  return threshold_timestamp;
1714 }
1715 
1716 void
1718 {
1725 }
1726 
1727 /*
1728  * XXX: Magic to keep old_snapshot_threshold tests appear "working". They
1729  * currently are broken, and discussion of what to do about them is
1730  * ongoing. See
1731  * https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
1732  */
1733 void
1735 {
1737 
1739 
1740  ts -= 5 * USECS_PER_SEC;
1741 
1745 }
1746 
1747 /*
1748  * If there is a valid mapping for the timestamp, set *xlimitp to
1749  * that. Returns whether there is such a mapping.
1750  */
1751 static bool
1753 {
1754  bool in_mapping = false;
1755 
1757 
1758  LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
1759 
1762  {
1763  int offset;
1764 
1765  offset = ((ts - oldSnapshotControl->head_timestamp)
1766  / USECS_PER_MINUTE);
1767  if (offset > oldSnapshotControl->count_used - 1)
1768  offset = oldSnapshotControl->count_used - 1;
1769  offset = (oldSnapshotControl->head_offset + offset)
1771 
1772  *xlimitp = oldSnapshotControl->xid_by_minute[offset];
1773 
1774  in_mapping = true;
1775  }
1776 
1777  LWLockRelease(OldSnapshotTimeMapLock);
1778 
1779  return in_mapping;
1780 }
1781 
1782 /*
1783  * TransactionIdLimitedForOldSnapshots
1784  *
1785  * Apply old snapshot limit. This is intended to be called for page pruning
1786  * and table vacuuming, to allow old_snapshot_threshold to override the normal
1787  * global xmin value. Actual testing for snapshot too old will be based on
1788  * whether a snapshot timestamp is prior to the threshold timestamp set in
1789  * this function.
1790  *
1791  * If the limited horizon allows a cleanup action that otherwise would not be
1792  * possible, SetOldSnapshotThresholdTimestamp(*limit_ts, *limit_xid) needs to
1793  * be called before that cleanup action.
1794  */
1795 bool
1797  Relation relation,
1798  TransactionId *limit_xid,
1799  TimestampTz *limit_ts)
1800 {
1801  TimestampTz ts;
1802  TransactionId xlimit = recentXmin;
1803  TransactionId latest_xmin;
1804  TimestampTz next_map_update_ts;
1805  TransactionId threshold_timestamp;
1806  TransactionId threshold_xid;
1807 
1808  Assert(TransactionIdIsNormal(recentXmin));
1810  Assert(limit_ts != NULL && limit_xid != NULL);
1811 
1812  /*
1813  * TestForOldSnapshot() assumes early pruning advances the page LSN, so we
1814  * can't prune early when skipping WAL.
1815  */
1816  if (!RelationAllowsEarlyPruning(relation) || !RelationNeedsWAL(relation))
1817  return false;
1818 
1820 
1822  latest_xmin = oldSnapshotControl->latest_xmin;
1823  next_map_update_ts = oldSnapshotControl->next_map_update;
1825 
1826  /*
1827  * Zero threshold always overrides to latest xmin, if valid. Without some
1828  * heuristic it will find its own snapshot too old on, for example, a
1829  * simple UPDATE -- which would make it useless for most testing, but
1830  * there is no principled way to ensure that it doesn't fail in this way.
1831  * Use a five-second delay to try to get useful testing behavior, but this
1832  * may need adjustment.
1833  */
1834  if (old_snapshot_threshold == 0)
1835  {
1836  if (TransactionIdPrecedes(latest_xmin, MyProc->xmin)
1837  && TransactionIdFollows(latest_xmin, xlimit))
1838  xlimit = latest_xmin;
1839 
1840  ts -= 5 * USECS_PER_SEC;
1841  }
1842  else
1843  {
1846 
1847  /* Check for fast exit without LW locking. */
1849  threshold_timestamp = oldSnapshotControl->threshold_timestamp;
1850  threshold_xid = oldSnapshotControl->threshold_xid;
1852 
1853  if (ts == threshold_timestamp)
1854  {
1855  /*
1856  * Current timestamp is in same bucket as the last limit that was
1857  * applied. Reuse.
1858  */
1859  xlimit = threshold_xid;
1860  }
1861  else if (ts == next_map_update_ts)
1862  {
1863  /*
1864  * FIXME: This branch is super iffy - but that should probably
1865  * fixed separately.
1866  */
1867  xlimit = latest_xmin;
1868  }
1869  else if (GetOldSnapshotFromTimeMapping(ts, &xlimit))
1870  {
1871  }
1872 
1873  /*
1874  * Failsafe protection against vacuuming work of active transaction.
1875  *
1876  * This is not an assertion because we avoid the spinlock for
1877  * performance, leaving open the possibility that xlimit could advance
1878  * and be more current; but it seems prudent to apply this limit. It
1879  * might make pruning a tiny bit less aggressive than it could be, but
1880  * protects against data loss bugs.
1881  */
1882  if (TransactionIdIsNormal(latest_xmin)
1883  && TransactionIdPrecedes(latest_xmin, xlimit))
1884  xlimit = latest_xmin;
1885  }
1886 
1887  if (TransactionIdIsValid(xlimit) &&
1888  TransactionIdFollowsOrEquals(xlimit, recentXmin))
1889  {
1890  *limit_ts = ts;
1891  *limit_xid = xlimit;
1892 
1893  return true;
1894  }
1895 
1896  return false;
1897 }
1898 
1899 /*
1900  * Take care of the circular buffer that maps time to xid.
1901  */
1902 void
1904 {
1905  TimestampTz ts;
1906  TransactionId latest_xmin;
1907  TimestampTz update_ts;
1908  bool map_update_required = false;
1909 
1910  /* Never call this function when old snapshot checking is disabled. */
1912 
1913  ts = AlignTimestampToMinuteBoundary(whenTaken);
1914 
1915  /*
1916  * Keep track of the latest xmin seen by any process. Update mapping with
1917  * a new value when we have crossed a bucket boundary.
1918  */
1920  latest_xmin = oldSnapshotControl->latest_xmin;
1921  update_ts = oldSnapshotControl->next_map_update;
1922  if (ts > update_ts)
1923  {
1925  map_update_required = true;
1926  }
1927  if (TransactionIdFollows(xmin, latest_xmin))
1930 
1931  /* We only needed to update the most recent xmin value. */
1932  if (!map_update_required)
1933  return;
1934 
1935  /* No further tracking needed for 0 (used for testing). */
1936  if (old_snapshot_threshold == 0)
1937  return;
1938 
1939  /*
1940  * We don't want to do something stupid with unusual values, but we don't
1941  * want to litter the log with warnings or break otherwise normal
1942  * processing for this feature; so if something seems unreasonable, just
1943  * log at DEBUG level and return without doing anything.
1944  */
1945  if (whenTaken < 0)
1946  {
1947  elog(DEBUG1,
1948  "MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld",
1949  (long) whenTaken);
1950  return;
1951  }
1952  if (!TransactionIdIsNormal(xmin))
1953  {
1954  elog(DEBUG1,
1955  "MaintainOldSnapshotTimeMapping called with xmin = %lu",
1956  (unsigned long) xmin);
1957  return;
1958  }
1959 
1960  LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
1961 
1967 
1968  if (oldSnapshotControl->count_used == 0)
1969  {
1970  /* set up first entry for empty mapping */
1974  oldSnapshotControl->xid_by_minute[0] = xmin;
1975  }
1976  else if (ts < oldSnapshotControl->head_timestamp)
1977  {
1978  /* old ts; log it at DEBUG */
1979  LWLockRelease(OldSnapshotTimeMapLock);
1980  elog(DEBUG1,
1981  "MaintainOldSnapshotTimeMapping called with old whenTaken = %ld",
1982  (long) whenTaken);
1983  return;
1984  }
1985  else if (ts <= (oldSnapshotControl->head_timestamp +
1987  * USECS_PER_MINUTE)))
1988  {
1989  /* existing mapping; advance xid if possible */
1990  int bucket = (oldSnapshotControl->head_offset
1992  / USECS_PER_MINUTE))
1994 
1996  oldSnapshotControl->xid_by_minute[bucket] = xmin;
1997  }
1998  else
1999  {
2000  /* We need a new bucket, but it might not be the very next one. */
2001  int distance_to_new_tail;
2002  int distance_to_current_tail;
2003  int advance;
2004 
2005  /*
2006  * Our goal is for the new "tail" of the mapping, that is, the entry
2007  * which is newest and thus furthest from the "head" entry, to
2008  * correspond to "ts". Since there's one entry per minute, the
2009  * distance between the current head and the new tail is just the
2010  * number of minutes of difference between ts and the current
2011  * head_timestamp.
2012  *
2013  * The distance from the current head to the current tail is one less
2014  * than the number of entries in the mapping, because the entry at the
2015  * head_offset is for 0 minutes after head_timestamp.
2016  *
2017  * The difference between these two values is the number of minutes by
2018  * which we need to advance the mapping, either adding new entries or
2019  * rotating old ones out.
2020  */
2021  distance_to_new_tail =
2023  distance_to_current_tail =
2025  advance = distance_to_new_tail - distance_to_current_tail;
2026  Assert(advance > 0);
2027 
2028  if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
2029  {
2030  /* Advance is so far that all old data is junk; start over. */
2033  oldSnapshotControl->xid_by_minute[0] = xmin;
2035  }
2036  else
2037  {
2038  /* Store the new value in one or more buckets. */
2039  int i;
2040 
2041  for (i = 0; i < advance; i++)
2042  {
2044  {
2045  /* Map full and new value replaces old head. */
2046  int old_head = oldSnapshotControl->head_offset;
2047 
2048  if (old_head == (OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1))
2050  else
2051  oldSnapshotControl->head_offset = old_head + 1;
2052  oldSnapshotControl->xid_by_minute[old_head] = xmin;
2054  }
2055  else
2056  {
2057  /* Extend map to unused entry. */
2058  int new_tail = (oldSnapshotControl->head_offset
2061 
2063  oldSnapshotControl->xid_by_minute[new_tail] = xmin;
2064  }
2065  }
2066  }
2067  }
2068 
2069  LWLockRelease(OldSnapshotTimeMapLock);
2070 }
2071 
2072 
2073 /*
2074  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
2075  * access to behave just like it did at a certain point in the past.
2076  *
2077  * Needed for logical decoding.
2078  */
2079 void
2080 SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
2081 {
2082  Assert(historic_snapshot != NULL);
2083 
2084  /* setup the timetravel snapshot */
2085  HistoricSnapshot = historic_snapshot;
2086 
2087  /* setup (cmin, cmax) lookup hash */
2088  tuplecid_data = tuplecids;
2089 }
2090 
2091 
2092 /*
2093  * Make catalog snapshots behave normally again.
2094  */
2095 void
2097 {
2098  HistoricSnapshot = NULL;
2099  tuplecid_data = NULL;
2100 }
2101 
2102 bool
2104 {
2105  return HistoricSnapshot != NULL;
2106 }
2107 
2108 HTAB *
2110 {
2112  return tuplecid_data;
2113 }
2114 
2115 /*
2116  * EstimateSnapshotSpace
2117  * Returns the size needed to store the given snapshot.
2118  *
2119  * We are exporting only required fields from the Snapshot, stored in
2120  * SerializedSnapshotData.
2121  */
2122 Size
2124 {
2125  Size size;
2126 
2127  Assert(snapshot != InvalidSnapshot);
2128  Assert(snapshot->snapshot_type == SNAPSHOT_MVCC);
2129 
2130  /* We allocate any XID arrays needed in the same palloc block. */
2131  size = add_size(sizeof(SerializedSnapshotData),
2132  mul_size(snapshot->xcnt, sizeof(TransactionId)));
2133  if (snapshot->subxcnt > 0 &&
2134  (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
2135  size = add_size(size,
2136  mul_size(snapshot->subxcnt, sizeof(TransactionId)));
2137 
2138  return size;
2139 }
2140 
2141 /*
2142  * SerializeSnapshot
2143  * Dumps the serialized snapshot (extracted from given snapshot) onto the
2144  * memory location at start_address.
2145  */
2146 void
2147 SerializeSnapshot(Snapshot snapshot, char *start_address)
2148 {
2149  SerializedSnapshotData serialized_snapshot;
2150 
2151  Assert(snapshot->subxcnt >= 0);
2152 
2153  /* Copy all required fields */
2154  serialized_snapshot.xmin = snapshot->xmin;
2155  serialized_snapshot.xmax = snapshot->xmax;
2156  serialized_snapshot.xcnt = snapshot->xcnt;
2157  serialized_snapshot.subxcnt = snapshot->subxcnt;
2158  serialized_snapshot.suboverflowed = snapshot->suboverflowed;
2159  serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
2160  serialized_snapshot.curcid = snapshot->curcid;
2161  serialized_snapshot.whenTaken = snapshot->whenTaken;
2162  serialized_snapshot.lsn = snapshot->lsn;
2163 
2164  /*
2165  * Ignore the SubXID array if it has overflowed, unless the snapshot was
2166  * taken during recovery - in that case, top-level XIDs are in subxip as
2167  * well, and we mustn't lose them.
2168  */
2169  if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
2170  serialized_snapshot.subxcnt = 0;
2171 
2172  /* Copy struct to possibly-unaligned buffer */
2173  memcpy(start_address,
2174  &serialized_snapshot, sizeof(SerializedSnapshotData));
2175 
2176  /* Copy XID array */
2177  if (snapshot->xcnt > 0)
2178  memcpy((TransactionId *) (start_address +
2179  sizeof(SerializedSnapshotData)),
2180  snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
2181 
2182  /*
2183  * Copy SubXID array. Don't bother to copy it if it had overflowed,
2184  * though, because it's not used anywhere in that case. Except if it's a
2185  * snapshot taken during recovery; all the top-level XIDs are in subxip as
2186  * well in that case, so we mustn't lose them.
2187  */
2188  if (serialized_snapshot.subxcnt > 0)
2189  {
2190  Size subxipoff = sizeof(SerializedSnapshotData) +
2191  snapshot->xcnt * sizeof(TransactionId);
2192 
2193  memcpy((TransactionId *) (start_address + subxipoff),
2194  snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
2195  }
2196 }
2197 
2198 /*
2199  * RestoreSnapshot
2200  * Restore a serialized snapshot from the specified address.
2201  *
2202  * The copy is palloc'd in TopTransactionContext and has initial refcounts set
2203  * to 0. The returned snapshot has the copied flag set.
2204  */
2205 Snapshot
2206 RestoreSnapshot(char *start_address)
2207 {
2208  SerializedSnapshotData serialized_snapshot;
2209  Size size;
2210  Snapshot snapshot;
2211  TransactionId *serialized_xids;
2212 
2213  memcpy(&serialized_snapshot, start_address,
2214  sizeof(SerializedSnapshotData));
2215  serialized_xids = (TransactionId *)
2216  (start_address + sizeof(SerializedSnapshotData));
2217 
2218  /* We allocate any XID arrays needed in the same palloc block. */
2219  size = sizeof(SnapshotData)
2220  + serialized_snapshot.xcnt * sizeof(TransactionId)
2221  + serialized_snapshot.subxcnt * sizeof(TransactionId);
2222 
2223  /* Copy all required fields */
2225  snapshot->snapshot_type = SNAPSHOT_MVCC;
2226  snapshot->xmin = serialized_snapshot.xmin;
2227  snapshot->xmax = serialized_snapshot.xmax;
2228  snapshot->xip = NULL;
2229  snapshot->xcnt = serialized_snapshot.xcnt;
2230  snapshot->subxip = NULL;
2231  snapshot->subxcnt = serialized_snapshot.subxcnt;
2232  snapshot->suboverflowed = serialized_snapshot.suboverflowed;
2233  snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
2234  snapshot->curcid = serialized_snapshot.curcid;
2235  snapshot->whenTaken = serialized_snapshot.whenTaken;
2236  snapshot->lsn = serialized_snapshot.lsn;
2237  snapshot->snapXactCompletionCount = 0;
2238 
2239  /* Copy XIDs, if present. */
2240  if (serialized_snapshot.xcnt > 0)
2241  {
2242  snapshot->xip = (TransactionId *) (snapshot + 1);
2243  memcpy(snapshot->xip, serialized_xids,
2244  serialized_snapshot.xcnt * sizeof(TransactionId));
2245  }
2246 
2247  /* Copy SubXIDs, if present. */
2248  if (serialized_snapshot.subxcnt > 0)
2249  {
2250  snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
2251  serialized_snapshot.xcnt;
2252  memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
2253  serialized_snapshot.subxcnt * sizeof(TransactionId));
2254  }
2255 
2256  /* Set the copied flag so that the caller will set refcounts correctly. */
2257  snapshot->regd_count = 0;
2258  snapshot->active_count = 0;
2259  snapshot->copied = true;
2260 
2261  return snapshot;
2262 }
2263 
2264 /*
2265  * Install a restored snapshot as the transaction snapshot.
2266  *
2267  * The second argument is of type void * so that snapmgr.h need not include
2268  * the declaration for PGPROC.
2269  */
2270 void
2271 RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
2272 {
2273  SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc);
2274 }
2275 
2276 /*
2277  * XidInMVCCSnapshot
2278  * Is the given XID still-in-progress according to the snapshot?
2279  *
2280  * Note: GetSnapshotData never stores either top xid or subxids of our own
2281  * backend into a snapshot, so these xids will not be reported as "running"
2282  * by this function. This is OK for current uses, because we always check
2283  * TransactionIdIsCurrentTransactionId first, except when it's known the
2284  * XID could not be ours anyway.
2285  */
2286 bool
2288 {
2289  /*
2290  * Make a quick range check to eliminate most XIDs without looking at the
2291  * xip arrays. Note that this is OK even if we convert a subxact XID to
2292  * its parent below, because a subxact with XID < xmin has surely also got
2293  * a parent with XID < xmin, while one with XID >= xmax must belong to a
2294  * parent that was not yet committed at the time of this snapshot.
2295  */
2296 
2297  /* Any xid < xmin is not in-progress */
2298  if (TransactionIdPrecedes(xid, snapshot->xmin))
2299  return false;
2300  /* Any xid >= xmax is in-progress */
2301  if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
2302  return true;
2303 
2304  /*
2305  * Snapshot information is stored slightly differently in snapshots taken
2306  * during recovery.
2307  */
2308  if (!snapshot->takenDuringRecovery)
2309  {
2310  /*
2311  * If the snapshot contains full subxact data, the fastest way to
2312  * check things is just to compare the given XID against both subxact
2313  * XIDs and top-level XIDs. If the snapshot overflowed, we have to
2314  * use pg_subtrans to convert a subxact XID to its parent XID, but
2315  * then we need only look at top-level XIDs not subxacts.
2316  */
2317  if (!snapshot->suboverflowed)
2318  {
2319  /* we have full data, so search subxip */
2320  if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
2321  return true;
2322 
2323  /* not there, fall through to search xip[] */
2324  }
2325  else
2326  {
2327  /*
2328  * Snapshot overflowed, so convert xid to top-level. This is safe
2329  * because we eliminated too-old XIDs above.
2330  */
2331  xid = SubTransGetTopmostTransaction(xid);
2332 
2333  /*
2334  * If xid was indeed a subxact, we might now have an xid < xmin,
2335  * so recheck to avoid an array scan. No point in rechecking
2336  * xmax.
2337  */
2338  if (TransactionIdPrecedes(xid, snapshot->xmin))
2339  return false;
2340  }
2341 
2342  if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
2343  return true;
2344  }
2345  else
2346  {
2347  /*
2348  * In recovery we store all xids in the subxip array because it is by
2349  * far the bigger array, and we mostly don't know which xids are
2350  * top-level and which are subxacts. The xip array is empty.
2351  *
2352  * We start by searching subtrans, if we overflowed.
2353  */
2354  if (snapshot->suboverflowed)
2355  {
2356  /*
2357  * Snapshot overflowed, so convert xid to top-level. This is safe
2358  * because we eliminated too-old XIDs above.
2359  */
2360  xid = SubTransGetTopmostTransaction(xid);
2361 
2362  /*
2363  * If xid was indeed a subxact, we might now have an xid < xmin,
2364  * so recheck to avoid an array scan. No point in rechecking
2365  * xmax.
2366  */
2367  if (TransactionIdPrecedes(xid, snapshot->xmin))
2368  return false;
2369  }
2370 
2371  /*
2372  * We now have either a top-level xid higher than xmin or an
2373  * indeterminate xid. We don't know whether it's top level or subxact
2374  * but it doesn't matter. If it's present, the xid is visible.
2375  */
2376  if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
2377  return true;
2378  }
2379 
2380  return false;
2381 }
Datum current_timestamp(PG_FUNCTION_ARGS)
Definition: timestamp.c:1600
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1582
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1546
static int32 next
Definition: blutils.c:219
unsigned int uint32
Definition: c.h:490
#define PG_BINARY_R
Definition: c.h:1262
signed int int32
Definition: c.h:478
#define PG_BINARY_W
Definition: c.h:1263
uint32 CommandId
Definition: c.h:650
uint32 TransactionId
Definition: c.h:636
#define OidIsValid(objectId)
Definition: c.h:759
size_t Size
Definition: c.h:589
int64 TimestampTz
Definition: timestamp.h:39
#define USECS_PER_SEC
Definition: timestamp.h:133
#define USECS_PER_MINUTE
Definition: timestamp.h:132
int errcode_for_file_access(void)
Definition: elog.c:881
int errdetail(const char *fmt,...)
Definition: elog.c:1202
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define LOG
Definition: elog.h:31
#define WARNING
Definition: elog.h:36
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
int FreeDir(DIR *dir)
Definition: fd.c:2762
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2384
int FreeFile(FILE *file)
Definition: fd.c:2582
struct dirent * ReadDirExtended(DIR *dir, const char *dirname, int elevel)
Definition: fd.c:2725
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2644
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int MyProcPid
Definition: globals.c:44
Oid MyDatabaseId
Definition: globals.c:89
long val
Definition: informix.c:664
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
Definition: list.c:338
#define VirtualTransactionIdIsValid(vxid)
Definition: lock.h:67
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1195
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
@ LW_SHARED
Definition: lwlock.h:116
@ LW_EXCLUSIVE
Definition: lwlock.h:115
MemoryContext TopTransactionContext
Definition: mcxt.c:146
char * pstrdup(const char *in)
Definition: mcxt.c:1624
void pfree(void *pointer)
Definition: mcxt.c:1436
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1005
void * palloc(Size size)
Definition: mcxt.c:1210
#define InvalidPid
Definition: miscadmin.h:32
void pairingheap_remove(pairingheap *heap, pairingheap_node *node)
Definition: pairingheap.c:170
void pairingheap_add(pairingheap *heap, pairingheap_node *node)
Definition: pairingheap.c:112
pairingheap_node * pairingheap_first(pairingheap *heap)
Definition: pairingheap.c:130
#define pairingheap_is_empty(h)
Definition: pairingheap.h:96
#define pairingheap_is_singular(h)
Definition: pairingheap.h:99
#define pairingheap_container(type, membername, ptr)
Definition: pairingheap.h:43
#define pairingheap_const_container(type, membername, ptr)
Definition: pairingheap.h:51
#define pairingheap_reset(h)
Definition: pairingheap.h:93
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
void * arg
#define MAXPGPATH
static char * filename
Definition: pg_dumpall.c:119
static bool pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
Definition: pg_lfind.h:90
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
static char * buf
Definition: pg_test_fsync.c:67
#define snprintf
Definition: port.h:238
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
void SetSerializableTransactionSnapshot(Snapshot snapshot, VirtualTransactionId *sourcevxid, int sourcepid)
Definition: predicate.c:1656
Snapshot GetSerializableTransactionSnapshot(Snapshot snapshot)
Definition: predicate.c:1616
int GetMaxSnapshotSubxidCount(void)
Definition: procarray.c:2109
Snapshot GetSnapshotData(Snapshot snapshot)
Definition: procarray.c:2235
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:2098
bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
Definition: procarray.c:2679
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
Definition: procarray.c:2600
#define RelationNeedsWAL(relation)
Definition: rel.h:628
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snapshot)
Definition: resowner.c:1255
void ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snapshot)
Definition: resowner.c:1264
void ResourceOwnerEnlargeSnapshots(ResourceOwner owner)
Definition: resowner.c:1244
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
static Snapshot HistoricSnapshot
Definition: snapmgr.c:106
void SnapshotTooOldMagicForTest(void)
Definition: snapmgr.c:1734
static Snapshot FirstXactSnapshot
Definition: snapmgr.c:157
void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
Definition: snapmgr.c:1903
TransactionId RecentXmin
Definition: snapmgr.c:114
SnapshotData CatalogSnapshotData
Definition: snapmgr.c:98
void UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
Definition: snapmgr.c:884
static void SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid, int sourcepid, PGPROC *sourceproc)
Definition: snapmgr.c:512
TimestampTz GetSnapshotCurrentTimestamp(void)
Definition: snapmgr.c:1680
void AtSubAbort_Snapshot(int level)
Definition: snapmgr.c:989
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2147
void SnapMgrInit(void)
Definition: snapmgr.c:214
SnapshotData SnapshotSelfData
Definition: snapmgr.c:99
void AtEOXact_Snapshot(bool isCommit, bool resetXmin)
Definition: snapmgr.c:1025
struct ActiveSnapshotElt ActiveSnapshotElt
static Snapshot CurrentSnapshot
Definition: snapmgr.c:103
static Snapshot SecondarySnapshot
Definition: snapmgr.c:104
bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
Definition: snapmgr.c:2287
static List * exportedSnapshots
Definition: snapmgr.c:170
static pairingheap RegisteredSnapshots
Definition: snapmgr.c:147
static TimestampTz AlignTimestampToMinuteBoundary(TimestampTz ts)
Definition: snapmgr.c:1666
bool FirstSnapshotSet
Definition: snapmgr.c:150
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:251
Snapshot GetLatestSnapshot(void)
Definition: snapmgr.c:326
void TeardownHistoricSnapshot(bool is_error)
Definition: snapmgr.c:2096
Snapshot GetCatalogSnapshot(Oid relid)
Definition: snapmgr.c:387
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:871
void PushActiveSnapshot(Snapshot snapshot)
Definition: snapmgr.c:683
static Snapshot CopySnapshot(Snapshot snapshot)
Definition: snapmgr.c:609
static ActiveSnapshotElt * OldestActiveSnapshot
Definition: snapmgr.c:138
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2206
static bool GetOldSnapshotFromTimeMapping(TimestampTz ts, TransactionId *xlimitp)
Definition: snapmgr.c:1752
HTAB * HistoricSnapshotGetTupleCids(void)
Definition: snapmgr.c:2109
void AtSubCommit_Snapshot(int level)
Definition: snapmgr.c:968
void UpdateActiveSnapshotCommandId(void)
Definition: snapmgr.c:747
static void SnapshotResetXmin(void)
Definition: snapmgr.c:944
static int parseIntFromText(const char *prefix, char **s, const char *filename)
Definition: snapmgr.c:1316
static SnapshotData SecondarySnapshotData
Definition: snapmgr.c:97
char * ExportSnapshot(Snapshot snapshot)
Definition: snapmgr.c:1125
static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
Definition: snapmgr.c:910
TransactionId TransactionXmin
Definition: snapmgr.c:113
SnapshotData SnapshotAnyData
Definition: snapmgr.c:100
bool HistoricSnapshotActive(void)
Definition: snapmgr.c:2103
void ImportSnapshot(const char *idstr)
Definition: snapmgr.c:1396
bool ActiveSnapshotSet(void)
Definition: snapmgr.c:817
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:829
bool XactHasExportedSnapshots(void)
Definition: snapmgr.c:1571
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1584
static void parseVxidFromText(const char *prefix, char **s, const char *filename, VirtualTransactionId *vxid)
Definition: snapmgr.c:1366
static void FreeSnapshot(Snapshot snapshot)
Definition: snapmgr.c:665
#define SNAPSHOT_EXPORT_DIR
Definition: snapmgr.c:160
bool HaveRegisteredOrActiveSnapshot(void)
Definition: snapmgr.c:1641
void InvalidateCatalogSnapshotConditionally(void)
Definition: snapmgr.c:478
static SnapshotData CurrentSnapshotData
Definition: snapmgr.c:96
bool ThereAreNoPriorRegisteredSnapshots(void)
Definition: snapmgr.c:1623
void RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
Definition: snapmgr.c:2271
bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation, TransactionId *limit_xid, TimestampTz *limit_ts)
Definition: snapmgr.c:1796
void SnapshotSetCommandId(CommandId curcid)
Definition: snapmgr.c:491
struct SerializedSnapshotData SerializedSnapshotData
void PopActiveSnapshot(void)
Definition: snapmgr.c:778
void PushCopiedSnapshot(Snapshot snapshot)
Definition: snapmgr.c:735
Size EstimateSnapshotSpace(Snapshot snapshot)
Definition: snapmgr.c:2123
static ActiveSnapshotElt * ActiveSnapshot
Definition: snapmgr.c:135
void SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
Definition: snapmgr.c:2080
TimestampTz GetOldSnapshotThresholdTimestamp(void)
Definition: snapmgr.c:1705
int old_snapshot_threshold
Definition: snapmgr.c:79
static HTAB * tuplecid_data
Definition: snapmgr.c:117
Snapshot RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
Definition: snapmgr.c:842
static TransactionId parseXidFromText(const char *prefix, char **s, const char *filename)
Definition: snapmgr.c:1341
void InvalidateCatalogSnapshot(void)
Definition: snapmgr.c:457
void PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
Definition: snapmgr.c:697
Snapshot GetNonHistoricCatalogSnapshot(Oid relid)
Definition: snapmgr.c:409
struct ExportedSnapshot ExportedSnapshot
volatile OldSnapshotControlData * oldSnapshotControl
Definition: snapmgr.c:81
Size SnapMgrShmemSize(void)
Definition: snapmgr.c:198
Snapshot GetOldestSnapshot(void)
Definition: snapmgr.c:358
void SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
Definition: snapmgr.c:1717
static Snapshot CatalogSnapshot
Definition: snapmgr.c:105
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:805
Datum pg_export_snapshot(PG_FUNCTION_ARGS)
Definition: snapmgr.c:1301
#define RelationAllowsEarlyPruning(rel)
Definition: snapmgr.h:38
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:102
#define OLD_SNAPSHOT_TIME_MAP_ENTRIES
Definition: snapmgr.h:32
struct SnapshotData * Snapshot
Definition: snapshot.h:121
struct SnapshotData SnapshotData
@ SNAPSHOT_SELF
Definition: snapshot.h:64
@ SNAPSHOT_MVCC
Definition: snapshot.h:50
@ SNAPSHOT_ANY
Definition: snapshot.h:69
#define InvalidSnapshot
Definition: snapshot.h:123
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
PGPROC * MyProc
Definition: proc.c:66
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
struct ActiveSnapshotElt * as_next
Definition: snapmgr.c:131
Snapshot as_snap
Definition: snapmgr.c:129
Definition: dirent.c:26
char * snapfile
Definition: snapmgr.c:165
Snapshot snapshot
Definition: snapmgr.c:166
Definition: dynahash.c:220
Definition: pg_list.h:54
TimestampTz next_map_update
Definition: old_snapshot.h:34
TimestampTz threshold_timestamp
Definition: old_snapshot.h:36
TransactionId latest_xmin
Definition: old_snapshot.h:33
TimestampTz head_timestamp
Definition: old_snapshot.h:68
TimestampTz current_timestamp
Definition: old_snapshot.h:31
TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER]
Definition: old_snapshot.h:70
TransactionId threshold_xid
Definition: old_snapshot.h:37
Definition: proc.h:162
TransactionId xmin
Definition: proc.h:178
LocalTransactionId lxid
Definition: proc.h:183
BackendId backendId
Definition: proc.h:197
TransactionId xmax
Definition: snapmgr.c:187
TimestampTz whenTaken
Definition: snapmgr.c:193
TransactionId xmin
Definition: snapmgr.c:186
TransactionId xmin
Definition: snapshot.h:157
int32 subxcnt
Definition: snapshot.h:181
bool copied
Definition: snapshot.h:185
uint32 regd_count
Definition: snapshot.h:205
uint32 active_count
Definition: snapshot.h:204
CommandId curcid
Definition: snapshot.h:187
pairingheap_node ph_node
Definition: snapshot.h:206
TimestampTz whenTaken
Definition: snapshot.h:208
uint32 xcnt
Definition: snapshot.h:169
TransactionId * subxip
Definition: snapshot.h:180
uint64 snapXactCompletionCount
Definition: snapshot.h:216
TransactionId xmax
Definition: snapshot.h:158
XLogRecPtr lsn
Definition: snapshot.h:209
SnapshotType snapshot_type
Definition: snapshot.h:144
TransactionId * xip
Definition: snapshot.h:168
bool suboverflowed
Definition: snapshot.h:182
bool takenDuringRecovery
Definition: snapshot.h:184
LocalTransactionId localTransactionId
Definition: lock.h:62
BackendId backendId
Definition: lock.h:61
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
__int64 st_size
Definition: win32_port.h:275
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:150
bool RelationHasSysCache(Oid relid)
Definition: syscache.c:1224
bool RelationInvalidatesSnapshotsOnly(Oid relid)
Definition: syscache.c:1201
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:329
#define InvalidTransactionId
Definition: transam.h:31
#define FirstNormalTransactionId
Definition: transam.h:34
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
text * cstring_to_text(const char *s)
Definition: varlena.c:182
#define fstat
Definition: win32_port.h:285
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:914
bool XactReadOnly
Definition: xact.c:82
TransactionId GetTopTransactionIdIfAny(void)
Definition: xact.c:432
int XactIsoLevel
Definition: xact.c:79
bool IsSubTransaction(void)
Definition: xact.c:4890
bool IsInParallelMode(void)
Definition: xact.c:1069
int xactGetCommittedChildren(TransactionId **ptr)
Definition: xact.c:5614
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:818
#define XACT_SERIALIZABLE
Definition: xact.h:39
#define IsolationUsesXactSnapshot()
Definition: xact.h:51
#define IsolationIsSerializable()
Definition: xact.h:52
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28