PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
snapmgr.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * snapmgr.c
4  * PostgreSQL snapshot manager
5  *
6  * We keep track of snapshots in two ways: those "registered" by resowner.c,
7  * and the "active snapshot" stack. All snapshots in either of them live in
8  * persistent memory. When a snapshot is no longer in any of these lists
9  * (tracked by separate refcounts on each snapshot), its memory can be freed.
10  *
11  * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
12  * regd_count and list it in RegisteredSnapshots, but this reference is not
13  * tracked by a resource owner. We used to use the TopTransactionResourceOwner
14  * to track this snapshot reference, but that introduces logical circularity
15  * and thus makes it impossible to clean up in a sane fashion. It's better to
16  * handle this reference as an internally-tracked registration, so that this
17  * module is entirely lower-level than ResourceOwners.
18  *
19  * Likewise, any snapshots that have been exported by pg_export_snapshot
20  * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
21  * tracked by any resource owner.
22  *
23  * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
24  * is valid, but is not tracked by any resource owner.
25  *
26  * The same is true for historic snapshots used during logical decoding,
27  * their lifetime is managed separately (as they live longer than one xact.c
28  * transaction).
29  *
30  * These arrangements let us reset MyPgXact->xmin when there are no snapshots
31  * referenced by this transaction, and advance it when the one with oldest
32  * Xmin is no longer referenced. For simplicity however, only registered
33  * snapshots not active snapshots participate in tracking which one is oldest;
34  * we don't try to change MyPgXact->xmin except when the active-snapshot
35  * stack is empty.
36  *
37  *
38  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
39  * Portions Copyright (c) 1994, Regents of the University of California
40  *
41  * IDENTIFICATION
42  * src/backend/utils/time/snapmgr.c
43  *
44  *-------------------------------------------------------------------------
45  */
46 #include "postgres.h"
47 
48 #include <sys/stat.h>
49 #include <unistd.h>
50 
51 #include "access/transam.h"
52 #include "access/xact.h"
53 #include "access/xlog.h"
54 #include "catalog/catalog.h"
55 #include "lib/pairingheap.h"
56 #include "miscadmin.h"
57 #include "storage/predicate.h"
58 #include "storage/proc.h"
59 #include "storage/procarray.h"
60 #include "storage/sinval.h"
61 #include "storage/spin.h"
62 #include "utils/builtins.h"
63 #include "utils/memutils.h"
64 #include "utils/rel.h"
65 #include "utils/resowner_private.h"
66 #include "utils/snapmgr.h"
67 #include "utils/syscache.h"
68 #include "utils/tqual.h"
69 
70 
71 /*
72  * GUC parameters
73  */
74 int old_snapshot_threshold; /* number of minutes, -1 disables */
75 
76 /*
77  * Structure for dealing with old_snapshot_threshold implementation.
78  */
79 typedef struct OldSnapshotControlData
80 {
81  /*
82  * Variables for old snapshot handling are shared among processes and are
83  * only allowed to move forward.
84  */
85  slock_t mutex_current; /* protect current_timestamp */
86  TimestampTz current_timestamp; /* latest snapshot timestamp */
87  slock_t mutex_latest_xmin; /* protect latest_xmin and
88  * next_map_update */
89  TransactionId latest_xmin; /* latest snapshot xmin */
90  TimestampTz next_map_update; /* latest snapshot valid up to */
91  slock_t mutex_threshold; /* protect threshold fields */
92  TimestampTz threshold_timestamp; /* earlier snapshot is old */
93  TransactionId threshold_xid; /* earlier xid may be gone */
94 
95  /*
96  * Keep one xid per minute for old snapshot error handling.
97  *
98  * Use a circular buffer with a head offset, a count of entries currently
99  * used, and a timestamp corresponding to the xid at the head offset. A
100  * count_used value of zero means that there are no times stored; a
101  * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer
102  * is full and the head must be advanced to add new entries. Use
103  * timestamps aligned to minute boundaries, since that seems less
104  * surprising than aligning based on the first usage timestamp. The
105  * latest bucket is effectively stored within latest_xmin. The circular
106  * buffer is updated when we get a new xmin value that doesn't fall into
107  * the same interval.
108  *
109  * It is OK if the xid for a given time slot is from earlier than
110  * calculated by adding the number of minutes corresponding to the
111  * (possibly wrapped) distance from the head offset to the time of the
112  * head entry, since that just results in the vacuuming of old tuples
113  * being slightly less aggressive. It would not be OK for it to be off in
114  * the other direction, since it might result in vacuuming tuples that are
115  * still expected to be there.
116  *
117  * Use of an SLRU was considered but not chosen because it is more
118  * heavyweight than is needed for this, and would probably not be any less
119  * code to implement.
120  *
121  * Persistence is not needed.
122  */
123  int head_offset; /* subscript of oldest tracked time */
124  TimestampTz head_timestamp; /* time corresponding to head xid */
125  int count_used; /* how many slots are in use */
126  TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
128 
130 
131 
132 /*
133  * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
134  * mode, and to the latest one taken in a read-committed transaction.
135  * SecondarySnapshot is a snapshot that's always up-to-date as of the current
136  * instant, even in transaction-snapshot mode. It should only be used for
137  * special-purpose code (say, RI checking.) CatalogSnapshot points to an
138  * MVCC snapshot intended to be used for catalog scans; we must invalidate it
139  * whenever a system catalog change occurs.
140  *
141  * These SnapshotData structs are static to simplify memory allocation
142  * (see the hack in GetSnapshotData to avoid repeated malloc/free).
143  */
147 
148 /* Pointers to valid snapshots */
153 
154 /*
155  * These are updated by GetSnapshotData. We initialize them this way
156  * for the convenience of TransactionIdIsInProgress: even in bootstrap
157  * mode, we don't want it to say that BootstrapTransactionId is in progress.
158  *
159  * RecentGlobalXmin and RecentGlobalDataXmin are initialized to
160  * InvalidTransactionId, to ensure that no one tries to use a stale
161  * value. Readers should ensure that it has been set to something else
162  * before using it.
163  */
168 
169 /* (table, ctid) => (cmin, cmax) mapping during timetravel */
171 
172 /*
173  * Elements of the active snapshot stack.
174  *
175  * Each element here accounts for exactly one active_count on SnapshotData.
176  *
177  * NB: the code assumes that elements in this list are in non-increasing
178  * order of as_level; also, the list must be NULL-terminated.
179  */
180 typedef struct ActiveSnapshotElt
181 {
183  int as_level;
186 
187 /* Top of the stack of active snapshots */
189 
190 /* Bottom of the stack of active snapshots */
192 
193 /*
194  * Currently registered Snapshots. Ordered in a heap by xmin, so that we can
195  * quickly find the one with lowest xmin, to advance our MyPgXact->xmin.
196  */
197 static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
198  void *arg);
199 
201 
202 /* first GetTransactionSnapshot call in a transaction? */
203 bool FirstSnapshotSet = false;
204 
205 /*
206  * Remember the serializable transaction snapshot, if any. We cannot trust
207  * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
208  * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
209  */
211 
212 /* Define pathname of exported-snapshot files */
213 #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
214 #define XactExportFilePath(path, xid, num, suffix) \
215  snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%d%s", \
216  xid, num, suffix)
217 
218 /* Current xact's exported snapshots (a list of Snapshot structs) */
220 
221 /* Prototypes for local functions */
223 static Snapshot CopySnapshot(Snapshot snapshot);
224 static void FreeSnapshot(Snapshot snapshot);
225 static void SnapshotResetXmin(void);
226 
227 /*
228  * Snapshot fields to be serialized.
229  *
230  * Only these fields need to be sent to the cooperating backend; the
231  * remaining ones can (and must) be set by the receiver upon restore.
232  */
234 {
245 
246 Size
248 {
249  Size size;
250 
251  size = offsetof(OldSnapshotControlData, xid_by_minute);
252  if (old_snapshot_threshold > 0)
253  size = add_size(size, mul_size(sizeof(TransactionId),
255 
256  return size;
257 }
258 
259 /*
260  * Initialize for managing old snapshot detection.
261  */
262 void
264 {
265  bool found;
266 
267  /*
268  * Create or attach to the OldSnapshotControlData structure.
269  */
270  oldSnapshotControl = (volatile OldSnapshotControlData *)
271  ShmemInitStruct("OldSnapshotControlData",
272  SnapMgrShmemSize(), &found);
273 
274  if (!found)
275  {
276  SpinLockInit(&oldSnapshotControl->mutex_current);
277  oldSnapshotControl->current_timestamp = 0;
278  SpinLockInit(&oldSnapshotControl->mutex_latest_xmin);
279  oldSnapshotControl->latest_xmin = InvalidTransactionId;
280  oldSnapshotControl->next_map_update = 0;
281  SpinLockInit(&oldSnapshotControl->mutex_threshold);
282  oldSnapshotControl->threshold_timestamp = 0;
283  oldSnapshotControl->threshold_xid = InvalidTransactionId;
284  oldSnapshotControl->head_offset = 0;
285  oldSnapshotControl->head_timestamp = 0;
286  oldSnapshotControl->count_used = 0;
287  }
288 }
289 
290 /*
291  * GetTransactionSnapshot
292  * Get the appropriate snapshot for a new query in a transaction.
293  *
294  * Note that the return value may point at static storage that will be modified
295  * by future calls and by CommandCounterIncrement(). Callers should call
296  * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
297  * used very long.
298  */
299 Snapshot
301 {
302  /*
303  * Return historic snapshot if doing logical decoding. We'll never need a
304  * non-historic transaction snapshot in this (sub-)transaction, so there's
305  * no need to be careful to set one up for later calls to
306  * GetTransactionSnapshot().
307  */
309  {
311  return HistoricSnapshot;
312  }
313 
314  /* First call in transaction? */
315  if (!FirstSnapshotSet)
316  {
317  /*
318  * Don't allow catalog snapshot to be older than xact snapshot. Must
319  * do this first to allow the empty-heap Assert to succeed.
320  */
322 
323  Assert(pairingheap_is_empty(&RegisteredSnapshots));
324  Assert(FirstXactSnapshot == NULL);
325 
326  if (IsInParallelMode())
327  elog(ERROR,
328  "cannot take query snapshot during a parallel operation");
329 
330  /*
331  * In transaction-snapshot mode, the first snapshot must live until
332  * end of xact regardless of what the caller does with it, so we must
333  * make a copy of it rather than returning CurrentSnapshotData
334  * directly. Furthermore, if we're running in serializable mode,
335  * predicate.c needs to wrap the snapshot fetch in its own processing.
336  */
338  {
339  /* First, create the snapshot in CurrentSnapshotData */
341  CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData);
342  else
343  CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
344  /* Make a saved copy */
345  CurrentSnapshot = CopySnapshot(CurrentSnapshot);
346  FirstXactSnapshot = CurrentSnapshot;
347  /* Mark it as "registered" in FirstXactSnapshot */
348  FirstXactSnapshot->regd_count++;
349  pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
350  }
351  else
352  CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
353 
354  FirstSnapshotSet = true;
355  return CurrentSnapshot;
356  }
357 
359  return CurrentSnapshot;
360 
361  /* Don't allow catalog snapshot to be older than xact snapshot. */
363 
364  CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
365 
366  return CurrentSnapshot;
367 }
368 
369 /*
370  * GetLatestSnapshot
371  * Get a snapshot that is up-to-date as of the current instant,
372  * even if we are executing in transaction-snapshot mode.
373  */
374 Snapshot
376 {
377  /*
378  * We might be able to relax this, but nothing that could otherwise work
379  * needs it.
380  */
381  if (IsInParallelMode())
382  elog(ERROR,
383  "cannot update SecondarySnapshot during a parallel operation");
384 
385  /*
386  * So far there are no cases requiring support for GetLatestSnapshot()
387  * during logical decoding, but it wouldn't be hard to add if required.
388  */
390 
391  /* If first call in transaction, go ahead and set the xact snapshot */
392  if (!FirstSnapshotSet)
393  return GetTransactionSnapshot();
394 
395  SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
396 
397  return SecondarySnapshot;
398 }
399 
400 /*
401  * GetOldestSnapshot
402  *
403  * Get the transaction's oldest known snapshot, as judged by the LSN.
404  * Will return NULL if there are no active or registered snapshots.
405  */
406 Snapshot
408 {
409  Snapshot OldestRegisteredSnapshot = NULL;
410  XLogRecPtr RegisteredLSN = InvalidXLogRecPtr;
411 
412  if (!pairingheap_is_empty(&RegisteredSnapshots))
413  {
414  OldestRegisteredSnapshot = pairingheap_container(SnapshotData, ph_node,
415  pairingheap_first(&RegisteredSnapshots));
416  RegisteredLSN = OldestRegisteredSnapshot->lsn;
417  }
418 
419  if (OldestActiveSnapshot != NULL)
420  {
421  XLogRecPtr ActiveLSN = OldestActiveSnapshot->as_snap->lsn;
422 
423  if (XLogRecPtrIsInvalid(RegisteredLSN) || RegisteredLSN > ActiveLSN)
424  return OldestActiveSnapshot->as_snap;
425  }
426 
427  return OldestRegisteredSnapshot;
428 }
429 
430 /*
431  * GetCatalogSnapshot
432  * Get a snapshot that is sufficiently up-to-date for scan of the
433  * system catalog with the specified OID.
434  */
435 Snapshot
437 {
438  /*
439  * Return historic snapshot while we're doing logical decoding, so we can
440  * see the appropriate state of the catalog.
441  *
442  * This is the primary reason for needing to reset the system caches after
443  * finishing decoding.
444  */
446  return HistoricSnapshot;
447 
448  return GetNonHistoricCatalogSnapshot(relid);
449 }
450 
451 /*
452  * GetNonHistoricCatalogSnapshot
453  * Get a snapshot that is sufficiently up-to-date for scan of the system
454  * catalog with the specified OID, even while historic snapshots are set
455  * up.
456  */
457 Snapshot
459 {
460  /*
461  * If the caller is trying to scan a relation that has no syscache, no
462  * catcache invalidations will be sent when it is updated. For a few key
463  * relations, snapshot invalidations are sent instead. If we're trying to
464  * scan a relation for which neither catcache nor snapshot invalidations
465  * are sent, we must refresh the snapshot every time.
466  */
467  if (CatalogSnapshot &&
469  !RelationHasSysCache(relid))
471 
472  if (CatalogSnapshot == NULL)
473  {
474  /* Get new snapshot. */
475  CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData);
476 
477  /*
478  * Make sure the catalog snapshot will be accounted for in decisions
479  * about advancing PGXACT->xmin. We could apply RegisterSnapshot, but
480  * that would result in making a physical copy, which is overkill; and
481  * it would also create a dependency on some resource owner, which we
482  * do not want for reasons explained at the head of this file. Instead
483  * just shove the CatalogSnapshot into the pairing heap manually. This
484  * has to be reversed in InvalidateCatalogSnapshot, of course.
485  *
486  * NB: it had better be impossible for this to throw error, since the
487  * CatalogSnapshot pointer is already valid.
488  */
489  pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
490  }
491 
492  return CatalogSnapshot;
493 }
494 
495 /*
496  * InvalidateCatalogSnapshot
497  * Mark the current catalog snapshot, if any, as invalid
498  *
499  * We could change this API to allow the caller to provide more fine-grained
500  * invalidation details, so that a change to relation A wouldn't prevent us
501  * from using our cached snapshot to scan relation B, but so far there's no
502  * evidence that the CPU cycles we spent tracking such fine details would be
503  * well-spent.
504  */
505 void
507 {
508  if (CatalogSnapshot)
509  {
510  pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
511  CatalogSnapshot = NULL;
513  }
514 }
515 
516 /*
517  * InvalidateCatalogSnapshotConditionally
518  * Drop catalog snapshot if it's the only one we have
519  *
520  * This is called when we are about to wait for client input, so we don't
521  * want to continue holding the catalog snapshot if it might mean that the
522  * global xmin horizon can't advance. However, if there are other snapshots
523  * still active or registered, the catalog snapshot isn't likely to be the
524  * oldest one, so we might as well keep it.
525  */
526 void
528 {
529  if (CatalogSnapshot &&
530  ActiveSnapshot == NULL &&
531  pairingheap_is_singular(&RegisteredSnapshots))
533 }
534 
535 /*
536  * SnapshotSetCommandId
537  * Propagate CommandCounterIncrement into the static snapshots, if set
538  */
539 void
541 {
542  if (!FirstSnapshotSet)
543  return;
544 
545  if (CurrentSnapshot)
546  CurrentSnapshot->curcid = curcid;
547  if (SecondarySnapshot)
548  SecondarySnapshot->curcid = curcid;
549  /* Should we do the same with CatalogSnapshot? */
550 }
551 
552 /*
553  * SetTransactionSnapshot
554  * Set the transaction's snapshot from an imported MVCC snapshot.
555  *
556  * Note that this is very closely tied to GetTransactionSnapshot --- it
557  * must take care of all the same considerations as the first-snapshot case
558  * in GetTransactionSnapshot.
559  */
560 static void
562  PGPROC *sourceproc)
563 {
564  /* Caller should have checked this already */
566 
567  /* Better do this to ensure following Assert succeeds. */
569 
570  Assert(pairingheap_is_empty(&RegisteredSnapshots));
571  Assert(FirstXactSnapshot == NULL);
573 
574  /*
575  * Even though we are not going to use the snapshot it computes, we must
576  * call GetSnapshotData, for two reasons: (1) to be sure that
577  * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
578  * RecentXmin and RecentGlobalXmin. (We could alternatively include those
579  * two variables in exported snapshot files, but it seems better to have
580  * snapshot importers compute reasonably up-to-date values for them.)
581  */
582  CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
583 
584  /*
585  * Now copy appropriate fields from the source snapshot.
586  */
587  CurrentSnapshot->xmin = sourcesnap->xmin;
588  CurrentSnapshot->xmax = sourcesnap->xmax;
589  CurrentSnapshot->xcnt = sourcesnap->xcnt;
590  Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
591  memcpy(CurrentSnapshot->xip, sourcesnap->xip,
592  sourcesnap->xcnt * sizeof(TransactionId));
593  CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
594  Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
595  memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
596  sourcesnap->subxcnt * sizeof(TransactionId));
597  CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
598  CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
599  /* NB: curcid should NOT be copied, it's a local matter */
600 
601  /*
602  * Now we have to fix what GetSnapshotData did with MyPgXact->xmin and
603  * TransactionXmin. There is a race condition: to make sure we are not
604  * causing the global xmin to go backwards, we have to test that the
605  * source transaction is still running, and that has to be done
606  * atomically. So let procarray.c do it.
607  *
608  * Note: in serializable mode, predicate.c will do this a second time. It
609  * doesn't seem worth contorting the logic here to avoid two calls,
610  * especially since it's not clear that predicate.c *must* do this.
611  */
612  if (sourceproc != NULL)
613  {
614  if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc))
615  ereport(ERROR,
616  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
617  errmsg("could not import the requested snapshot"),
618  errdetail("The source transaction is not running anymore.")));
619  }
620  else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcexid))
621  ereport(ERROR,
622  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
623  errmsg("could not import the requested snapshot"),
624  errdetail("The source transaction %u is not running anymore.",
625  sourcexid)));
626 
627  /*
628  * In transaction-snapshot mode, the first snapshot must live until end of
629  * xact, so we must make a copy of it. Furthermore, if we're running in
630  * serializable mode, predicate.c needs to do its own processing.
631  */
633  {
635  SetSerializableTransactionSnapshot(CurrentSnapshot, sourcexid);
636  /* Make a saved copy */
637  CurrentSnapshot = CopySnapshot(CurrentSnapshot);
638  FirstXactSnapshot = CurrentSnapshot;
639  /* Mark it as "registered" in FirstXactSnapshot */
640  FirstXactSnapshot->regd_count++;
641  pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
642  }
643 
644  FirstSnapshotSet = true;
645 }
646 
647 /*
648  * CopySnapshot
649  * Copy the given snapshot.
650  *
651  * The copy is palloc'd in TopTransactionContext and has initial refcounts set
652  * to 0. The returned snapshot has the copied flag set.
653  */
654 static Snapshot
656 {
657  Snapshot newsnap;
658  Size subxipoff;
659  Size size;
660 
661  Assert(snapshot != InvalidSnapshot);
662 
663  /* We allocate any XID arrays needed in the same palloc block. */
664  size = subxipoff = sizeof(SnapshotData) +
665  snapshot->xcnt * sizeof(TransactionId);
666  if (snapshot->subxcnt > 0)
667  size += snapshot->subxcnt * sizeof(TransactionId);
668 
670  memcpy(newsnap, snapshot, sizeof(SnapshotData));
671 
672  newsnap->regd_count = 0;
673  newsnap->active_count = 0;
674  newsnap->copied = true;
675 
676  /* setup XID array */
677  if (snapshot->xcnt > 0)
678  {
679  newsnap->xip = (TransactionId *) (newsnap + 1);
680  memcpy(newsnap->xip, snapshot->xip,
681  snapshot->xcnt * sizeof(TransactionId));
682  }
683  else
684  newsnap->xip = NULL;
685 
686  /*
687  * Setup subXID array. Don't bother to copy it if it had overflowed,
688  * though, because it's not used anywhere in that case. Except if it's a
689  * snapshot taken during recovery; all the top-level XIDs are in subxip as
690  * well in that case, so we mustn't lose them.
691  */
692  if (snapshot->subxcnt > 0 &&
693  (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
694  {
695  newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
696  memcpy(newsnap->subxip, snapshot->subxip,
697  snapshot->subxcnt * sizeof(TransactionId));
698  }
699  else
700  newsnap->subxip = NULL;
701 
702  return newsnap;
703 }
704 
705 /*
706  * FreeSnapshot
707  * Free the memory associated with a snapshot.
708  */
709 static void
711 {
712  Assert(snapshot->regd_count == 0);
713  Assert(snapshot->active_count == 0);
714  Assert(snapshot->copied);
715 
716  pfree(snapshot);
717 }
718 
719 /*
720  * PushActiveSnapshot
721  * Set the given snapshot as the current active snapshot
722  *
723  * If the passed snapshot is a statically-allocated one, or it is possibly
724  * subject to a future command counter update, create a new long-lived copy
725  * with active refcount=1. Otherwise, only increment the refcount.
726  */
727 void
729 {
730  ActiveSnapshotElt *newactive;
731 
732  Assert(snap != InvalidSnapshot);
733 
735 
736  /*
737  * Checking SecondarySnapshot is probably useless here, but it seems
738  * better to be sure.
739  */
740  if (snap == CurrentSnapshot || snap == SecondarySnapshot || !snap->copied)
741  newactive->as_snap = CopySnapshot(snap);
742  else
743  newactive->as_snap = snap;
744 
745  newactive->as_next = ActiveSnapshot;
747 
748  newactive->as_snap->active_count++;
749 
750  ActiveSnapshot = newactive;
751  if (OldestActiveSnapshot == NULL)
752  OldestActiveSnapshot = ActiveSnapshot;
753 }
754 
755 /*
756  * PushCopiedSnapshot
757  * As above, except forcibly copy the presented snapshot.
758  *
759  * This should be used when the ActiveSnapshot has to be modifiable, for
760  * example if the caller intends to call UpdateActiveSnapshotCommandId.
761  * The new snapshot will be released when popped from the stack.
762  */
763 void
765 {
766  PushActiveSnapshot(CopySnapshot(snapshot));
767 }
768 
769 /*
770  * UpdateActiveSnapshotCommandId
771  *
772  * Update the current CID of the active snapshot. This can only be applied
773  * to a snapshot that is not referenced elsewhere.
774  */
775 void
777 {
778  CommandId save_curcid,
779  curcid;
780 
781  Assert(ActiveSnapshot != NULL);
782  Assert(ActiveSnapshot->as_snap->active_count == 1);
783  Assert(ActiveSnapshot->as_snap->regd_count == 0);
784 
785  /*
786  * Don't allow modification of the active snapshot during parallel
787  * operation. We share the snapshot to worker backends at the beginning
788  * of parallel operation, so any change to the snapshot can lead to
789  * inconsistencies. We have other defenses against
790  * CommandCounterIncrement, but there are a few places that call this
791  * directly, so we put an additional guard here.
792  */
793  save_curcid = ActiveSnapshot->as_snap->curcid;
794  curcid = GetCurrentCommandId(false);
795  if (IsInParallelMode() && save_curcid != curcid)
796  elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
797  ActiveSnapshot->as_snap->curcid = curcid;
798 }
799 
800 /*
801  * PopActiveSnapshot
802  *
803  * Remove the topmost snapshot from the active snapshot stack, decrementing the
804  * reference count, and free it if this was the last reference.
805  */
806 void
808 {
809  ActiveSnapshotElt *newstack;
810 
811  newstack = ActiveSnapshot->as_next;
812 
813  Assert(ActiveSnapshot->as_snap->active_count > 0);
814 
815  ActiveSnapshot->as_snap->active_count--;
816 
817  if (ActiveSnapshot->as_snap->active_count == 0 &&
818  ActiveSnapshot->as_snap->regd_count == 0)
819  FreeSnapshot(ActiveSnapshot->as_snap);
820 
821  pfree(ActiveSnapshot);
822  ActiveSnapshot = newstack;
823  if (ActiveSnapshot == NULL)
824  OldestActiveSnapshot = NULL;
825 
827 }
828 
829 /*
830  * GetActiveSnapshot
831  * Return the topmost snapshot in the Active stack.
832  */
833 Snapshot
835 {
836  Assert(ActiveSnapshot != NULL);
837 
838  return ActiveSnapshot->as_snap;
839 }
840 
841 /*
842  * ActiveSnapshotSet
843  * Return whether there is at least one snapshot in the Active stack
844  */
845 bool
847 {
848  return ActiveSnapshot != NULL;
849 }
850 
851 /*
852  * RegisterSnapshot
853  * Register a snapshot as being in use by the current resource owner
854  *
855  * If InvalidSnapshot is passed, it is not registered.
856  */
857 Snapshot
859 {
860  if (snapshot == InvalidSnapshot)
861  return InvalidSnapshot;
862 
864 }
865 
866 /*
867  * RegisterSnapshotOnOwner
868  * As above, but use the specified resource owner
869  */
870 Snapshot
872 {
873  Snapshot snap;
874 
875  if (snapshot == InvalidSnapshot)
876  return InvalidSnapshot;
877 
878  /* Static snapshot? Create a persistent copy */
879  snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
880 
881  /* and tell resowner.c about it */
883  snap->regd_count++;
884  ResourceOwnerRememberSnapshot(owner, snap);
885 
886  if (snap->regd_count == 1)
887  pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
888 
889  return snap;
890 }
891 
892 /*
893  * UnregisterSnapshot
894  *
895  * Decrement the reference count of a snapshot, remove the corresponding
896  * reference from CurrentResourceOwner, and free the snapshot if no more
897  * references remain.
898  */
899 void
901 {
902  if (snapshot == NULL)
903  return;
904 
906 }
907 
908 /*
909  * UnregisterSnapshotFromOwner
910  * As above, but use the specified resource owner
911  */
912 void
914 {
915  if (snapshot == NULL)
916  return;
917 
918  Assert(snapshot->regd_count > 0);
919  Assert(!pairingheap_is_empty(&RegisteredSnapshots));
920 
921  ResourceOwnerForgetSnapshot(owner, snapshot);
922 
923  snapshot->regd_count--;
924  if (snapshot->regd_count == 0)
925  pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
926 
927  if (snapshot->regd_count == 0 && snapshot->active_count == 0)
928  {
929  FreeSnapshot(snapshot);
931  }
932 }
933 
934 /*
935  * Comparison function for RegisteredSnapshots heap. Snapshots are ordered
936  * by xmin, so that the snapshot with smallest xmin is at the top.
937  */
938 static int
939 xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
940 {
941  const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
942  const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
943 
944  if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
945  return 1;
946  else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
947  return -1;
948  else
949  return 0;
950 }
951 
952 /*
953  * SnapshotResetXmin
954  *
955  * If there are no more snapshots, we can reset our PGXACT->xmin to InvalidXid.
956  * Note we can do this without locking because we assume that storing an Xid
957  * is atomic.
958  *
959  * Even if there are some remaining snapshots, we may be able to advance our
960  * PGXACT->xmin to some degree. This typically happens when a portal is
961  * dropped. For efficiency, we only consider recomputing PGXACT->xmin when
962  * the active snapshot stack is empty; this allows us not to need to track
963  * which active snapshot is oldest.
964  *
965  * Note: it's tempting to use GetOldestSnapshot() here so that we can include
966  * active snapshots in the calculation. However, that compares by LSN not
967  * xmin so it's not entirely clear that it's the same thing. Also, we'd be
968  * critically dependent on the assumption that the bottommost active snapshot
969  * stack entry has the oldest xmin. (Current uses of GetOldestSnapshot() are
970  * not actually critical, but this would be.)
971  */
972 static void
974 {
975  Snapshot minSnapshot;
976 
977  if (ActiveSnapshot != NULL)
978  return;
979 
980  if (pairingheap_is_empty(&RegisteredSnapshots))
981  {
983  return;
984  }
985 
986  minSnapshot = pairingheap_container(SnapshotData, ph_node,
987  pairingheap_first(&RegisteredSnapshots));
988 
989  if (TransactionIdPrecedes(MyPgXact->xmin, minSnapshot->xmin))
990  MyPgXact->xmin = minSnapshot->xmin;
991 }
992 
993 /*
994  * AtSubCommit_Snapshot
995  */
996 void
998 {
999  ActiveSnapshotElt *active;
1000 
1001  /*
1002  * Relabel the active snapshots set in this subtransaction as though they
1003  * are owned by the parent subxact.
1004  */
1005  for (active = ActiveSnapshot; active != NULL; active = active->as_next)
1006  {
1007  if (active->as_level < level)
1008  break;
1009  active->as_level = level - 1;
1010  }
1011 }
1012 
1013 /*
1014  * AtSubAbort_Snapshot
1015  * Clean up snapshots after a subtransaction abort
1016  */
1017 void
1019 {
1020  /* Forget the active snapshots set by this subtransaction */
1021  while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
1022  {
1024 
1025  next = ActiveSnapshot->as_next;
1026 
1027  /*
1028  * Decrement the snapshot's active count. If it's still registered or
1029  * marked as active by an outer subtransaction, we can't free it yet.
1030  */
1031  Assert(ActiveSnapshot->as_snap->active_count >= 1);
1032  ActiveSnapshot->as_snap->active_count -= 1;
1033 
1034  if (ActiveSnapshot->as_snap->active_count == 0 &&
1035  ActiveSnapshot->as_snap->regd_count == 0)
1036  FreeSnapshot(ActiveSnapshot->as_snap);
1037 
1038  /* and free the stack element */
1039  pfree(ActiveSnapshot);
1040 
1041  ActiveSnapshot = next;
1042  if (ActiveSnapshot == NULL)
1043  OldestActiveSnapshot = NULL;
1044  }
1045 
1047 }
1048 
1049 /*
1050  * AtEOXact_Snapshot
1051  * Snapshot manager's cleanup function for end of transaction
1052  */
1053 void
1054 AtEOXact_Snapshot(bool isCommit, bool resetXmin)
1055 {
1056  /*
1057  * In transaction-snapshot mode we must release our privately-managed
1058  * reference to the transaction snapshot. We must remove it from
1059  * RegisteredSnapshots to keep the check below happy. But we don't bother
1060  * to do FreeSnapshot, for two reasons: the memory will go away with
1061  * TopTransactionContext anyway, and if someone has left the snapshot
1062  * stacked as active, we don't want the code below to be chasing through a
1063  * dangling pointer.
1064  */
1065  if (FirstXactSnapshot != NULL)
1066  {
1067  Assert(FirstXactSnapshot->regd_count > 0);
1068  Assert(!pairingheap_is_empty(&RegisteredSnapshots));
1069  pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
1070  }
1071  FirstXactSnapshot = NULL;
1072 
1073  /*
1074  * If we exported any snapshots, clean them up.
1075  */
1076  if (exportedSnapshots != NIL)
1077  {
1079  int i;
1080  char buf[MAXPGPATH];
1081  ListCell *lc;
1082 
1083  /*
1084  * Get rid of the files. Unlink failure is only a WARNING because (1)
1085  * it's too late to abort the transaction, and (2) leaving a leaked
1086  * file around has little real consequence anyway.
1087  */
1088  for (i = 1; i <= list_length(exportedSnapshots); i++)
1089  {
1090  XactExportFilePath(buf, myxid, i, "");
1091  if (unlink(buf))
1092  elog(WARNING, "could not unlink file \"%s\": %m", buf);
1093  }
1094 
1095  /*
1096  * As with the FirstXactSnapshot, we needn't spend any effort on
1097  * cleaning up the per-snapshot data structures, but we do need to
1098  * remove them from RegisteredSnapshots to prevent a warning below.
1099  */
1100  foreach(lc, exportedSnapshots)
1101  {
1102  Snapshot snap = (Snapshot) lfirst(lc);
1103 
1104  pairingheap_remove(&RegisteredSnapshots, &snap->ph_node);
1105  }
1106 
1107  exportedSnapshots = NIL;
1108  }
1109 
1110  /* Drop catalog snapshot if any */
1112 
1113  /* On commit, complain about leftover snapshots */
1114  if (isCommit)
1115  {
1116  ActiveSnapshotElt *active;
1117 
1118  if (!pairingheap_is_empty(&RegisteredSnapshots))
1119  elog(WARNING, "registered snapshots seem to remain after cleanup");
1120 
1121  /* complain about unpopped active snapshots */
1122  for (active = ActiveSnapshot; active != NULL; active = active->as_next)
1123  elog(WARNING, "snapshot %p still active", active);
1124  }
1125 
1126  /*
1127  * And reset our state. We don't need to free the memory explicitly --
1128  * it'll go away with TopTransactionContext.
1129  */
1130  ActiveSnapshot = NULL;
1131  OldestActiveSnapshot = NULL;
1132  pairingheap_reset(&RegisteredSnapshots);
1133 
1134  CurrentSnapshot = NULL;
1135  SecondarySnapshot = NULL;
1136 
1137  FirstSnapshotSet = false;
1138 
1139  /*
1140  * During normal commit processing, we call
1141  * ProcArrayEndTransaction() to reset the PgXact->xmin. That call
1142  * happens prior to the call to AtEOXact_Snapshot(), so we need
1143  * not touch xmin here at all.
1144  */
1145  if (resetXmin)
1147 
1148  Assert(resetXmin || MyPgXact->xmin == 0);
1149 }
1150 
1151 
1152 /*
1153  * ExportSnapshot
1154  * Export the snapshot to a file so that other backends can import it.
1155  * Returns the token (the file name) that can be used to import this
1156  * snapshot.
1157  */
1158 char *
1160 {
1161  TransactionId topXid;
1162  TransactionId *children;
1163  int nchildren;
1164  int addTopXid;
1166  FILE *f;
1167  int i;
1168  MemoryContext oldcxt;
1169  char path[MAXPGPATH];
1170  char pathtmp[MAXPGPATH];
1171 
1172  /*
1173  * It's tempting to call RequireTransactionChain here, since it's not very
1174  * useful to export a snapshot that will disappear immediately afterwards.
1175  * However, we haven't got enough information to do that, since we don't
1176  * know if we're at top level or not. For example, we could be inside a
1177  * plpgsql function that is going to fire off other transactions via
1178  * dblink. Rather than disallow perfectly legitimate usages, don't make a
1179  * check.
1180  *
1181  * Also note that we don't make any restriction on the transaction's
1182  * isolation level; however, importers must check the level if they are
1183  * serializable.
1184  */
1185 
1186  /*
1187  * This will assign a transaction ID if we do not yet have one.
1188  */
1189  topXid = GetTopTransactionId();
1190 
1191  /*
1192  * We cannot export a snapshot from a subtransaction because there's no
1193  * easy way for importers to verify that the same subtransaction is still
1194  * running.
1195  */
1196  if (IsSubTransaction())
1197  ereport(ERROR,
1198  (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1199  errmsg("cannot export a snapshot from a subtransaction")));
1200 
1201  /*
1202  * We do however allow previous committed subtransactions to exist.
1203  * Importers of the snapshot must see them as still running, so get their
1204  * XIDs to add them to the snapshot.
1205  */
1206  nchildren = xactGetCommittedChildren(&children);
1207 
1208  /*
1209  * Copy the snapshot into TopTransactionContext, add it to the
1210  * exportedSnapshots list, and mark it pseudo-registered. We do this to
1211  * ensure that the snapshot's xmin is honored for the rest of the
1212  * transaction.
1213  */
1214  snapshot = CopySnapshot(snapshot);
1215 
1217  exportedSnapshots = lappend(exportedSnapshots, snapshot);
1218  MemoryContextSwitchTo(oldcxt);
1219 
1220  snapshot->regd_count++;
1221  pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
1222 
1223  /*
1224  * Fill buf with a text serialization of the snapshot, plus identification
1225  * data about this transaction. The format expected by ImportSnapshot is
1226  * pretty rigid: each line must be fieldname:value.
1227  */
1228  initStringInfo(&buf);
1229 
1230  appendStringInfo(&buf, "xid:%u\n", topXid);
1231  appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
1232  appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
1233  appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
1234 
1235  appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
1236  appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
1237 
1238  /*
1239  * We must include our own top transaction ID in the top-xid data, since
1240  * by definition we will still be running when the importing transaction
1241  * adopts the snapshot, but GetSnapshotData never includes our own XID in
1242  * the snapshot. (There must, therefore, be enough room to add it.)
1243  *
1244  * However, it could be that our topXid is after the xmax, in which case
1245  * we shouldn't include it because xip[] members are expected to be before
1246  * xmax. (We need not make the same check for subxip[] members, see
1247  * snapshot.h.)
1248  */
1249  addTopXid = TransactionIdPrecedes(topXid, snapshot->xmax) ? 1 : 0;
1250  appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
1251  for (i = 0; i < snapshot->xcnt; i++)
1252  appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
1253  if (addTopXid)
1254  appendStringInfo(&buf, "xip:%u\n", topXid);
1255 
1256  /*
1257  * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
1258  * we have to cope with possible overflow.
1259  */
1260  if (snapshot->suboverflowed ||
1261  snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
1262  appendStringInfoString(&buf, "sof:1\n");
1263  else
1264  {
1265  appendStringInfoString(&buf, "sof:0\n");
1266  appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
1267  for (i = 0; i < snapshot->subxcnt; i++)
1268  appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
1269  for (i = 0; i < nchildren; i++)
1270  appendStringInfo(&buf, "sxp:%u\n", children[i]);
1271  }
1272  appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
1273 
1274  /*
1275  * Now write the text representation into a file. We first write to a
1276  * ".tmp" filename, and rename to final filename if no error. This
1277  * ensures that no other backend can read an incomplete file
1278  * (ImportSnapshot won't allow it because of its valid-characters check).
1279  */
1280  XactExportFilePath(pathtmp, topXid, list_length(exportedSnapshots), ".tmp");
1281  if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
1282  ereport(ERROR,
1284  errmsg("could not create file \"%s\": %m", pathtmp)));
1285 
1286  if (fwrite(buf.data, buf.len, 1, f) != 1)
1287  ereport(ERROR,
1289  errmsg("could not write to file \"%s\": %m", pathtmp)));
1290 
1291  /* no fsync() since file need not survive a system crash */
1292 
1293  if (FreeFile(f))
1294  ereport(ERROR,
1296  errmsg("could not write to file \"%s\": %m", pathtmp)));
1297 
1298  /*
1299  * Now that we have written everything into a .tmp file, rename the file
1300  * to remove the .tmp suffix.
1301  */
1302  XactExportFilePath(path, topXid, list_length(exportedSnapshots), "");
1303 
1304  if (rename(pathtmp, path) < 0)
1305  ereport(ERROR,
1307  errmsg("could not rename file \"%s\" to \"%s\": %m",
1308  pathtmp, path)));
1309 
1310  /*
1311  * The basename of the file is what we return from pg_export_snapshot().
1312  * It's already in path in a textual format and we know that the path
1313  * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash
1314  * and pstrdup it so as not to return the address of a local variable.
1315  */
1316  return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
1317 }
1318 
1319 /*
1320  * pg_export_snapshot
1321  * SQL-callable wrapper for ExportSnapshot.
1322  */
1323 Datum
1325 {
1326  char *snapshotName;
1327 
1328  snapshotName = ExportSnapshot(GetActiveSnapshot());
1329  PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
1330 }
1331 
1332 
1333 /*
1334  * Parsing subroutines for ImportSnapshot: parse a line with the given
1335  * prefix followed by a value, and advance *s to the next line. The
1336  * filename is provided for use in error messages.
1337  */
1338 static int
1339 parseIntFromText(const char *prefix, char **s, const char *filename)
1340 {
1341  char *ptr = *s;
1342  int prefixlen = strlen(prefix);
1343  int val;
1344 
1345  if (strncmp(ptr, prefix, prefixlen) != 0)
1346  ereport(ERROR,
1347  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1348  errmsg("invalid snapshot data in file \"%s\"", filename)));
1349  ptr += prefixlen;
1350  if (sscanf(ptr, "%d", &val) != 1)
1351  ereport(ERROR,
1352  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1353  errmsg("invalid snapshot data in file \"%s\"", filename)));
1354  ptr = strchr(ptr, '\n');
1355  if (!ptr)
1356  ereport(ERROR,
1357  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1358  errmsg("invalid snapshot data in file \"%s\"", filename)));
1359  *s = ptr + 1;
1360  return val;
1361 }
1362 
1363 static TransactionId
1364 parseXidFromText(const char *prefix, char **s, const char *filename)
1365 {
1366  char *ptr = *s;
1367  int prefixlen = strlen(prefix);
1369 
1370  if (strncmp(ptr, prefix, prefixlen) != 0)
1371  ereport(ERROR,
1372  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1373  errmsg("invalid snapshot data in file \"%s\"", filename)));
1374  ptr += prefixlen;
1375  if (sscanf(ptr, "%u", &val) != 1)
1376  ereport(ERROR,
1377  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1378  errmsg("invalid snapshot data in file \"%s\"", filename)));
1379  ptr = strchr(ptr, '\n');
1380  if (!ptr)
1381  ereport(ERROR,
1382  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1383  errmsg("invalid snapshot data in file \"%s\"", filename)));
1384  *s = ptr + 1;
1385  return val;
1386 }
1387 
1388 /*
1389  * ImportSnapshot
1390  * Import a previously exported snapshot. The argument should be a
1391  * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file.
1392  * This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1393  */
1394 void
1395 ImportSnapshot(const char *idstr)
1396 {
1397  char path[MAXPGPATH];
1398  FILE *f;
1399  struct stat stat_buf;
1400  char *filebuf;
1401  int xcnt;
1402  int i;
1403  TransactionId src_xid;
1404  Oid src_dbid;
1405  int src_isolevel;
1406  bool src_readonly;
1407  SnapshotData snapshot;
1408 
1409  /*
1410  * Must be at top level of a fresh transaction. Note in particular that
1411  * we check we haven't acquired an XID --- if we have, it's conceivable
1412  * that the snapshot would show it as not running, making for very screwy
1413  * behavior.
1414  */
1415  if (FirstSnapshotSet ||
1417  IsSubTransaction())
1418  ereport(ERROR,
1419  (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1420  errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1421 
1422  /*
1423  * If we are in read committed mode then the next query would execute with
1424  * a new snapshot thus making this function call quite useless.
1425  */
1427  ereport(ERROR,
1428  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1429  errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1430 
1431  /*
1432  * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do
1433  * this mainly to prevent reading arbitrary files.
1434  */
1435  if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
1436  ereport(ERROR,
1437  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1438  errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1439 
1440  /* OK, read the file */
1441  snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
1442 
1443  f = AllocateFile(path, PG_BINARY_R);
1444  if (!f)
1445  ereport(ERROR,
1446  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1447  errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1448 
1449  /* get the size of the file so that we know how much memory we need */
1450  if (fstat(fileno(f), &stat_buf))
1451  elog(ERROR, "could not stat file \"%s\": %m", path);
1452 
1453  /* and read the file into a palloc'd string */
1454  filebuf = (char *) palloc(stat_buf.st_size + 1);
1455  if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
1456  elog(ERROR, "could not read file \"%s\": %m", path);
1457 
1458  filebuf[stat_buf.st_size] = '\0';
1459 
1460  FreeFile(f);
1461 
1462  /*
1463  * Construct a snapshot struct by parsing the file content.
1464  */
1465  memset(&snapshot, 0, sizeof(snapshot));
1466 
1467  src_xid = parseXidFromText("xid:", &filebuf, path);
1468  /* we abuse parseXidFromText a bit here ... */
1469  src_dbid = parseXidFromText("dbid:", &filebuf, path);
1470  src_isolevel = parseIntFromText("iso:", &filebuf, path);
1471  src_readonly = parseIntFromText("ro:", &filebuf, path);
1472 
1473  snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
1474  snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
1475 
1476  snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1477 
1478  /* sanity-check the xid count before palloc */
1479  if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
1480  ereport(ERROR,
1481  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1482  errmsg("invalid snapshot data in file \"%s\"", path)));
1483 
1484  snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1485  for (i = 0; i < xcnt; i++)
1486  snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
1487 
1488  snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1489 
1490  if (!snapshot.suboverflowed)
1491  {
1492  snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1493 
1494  /* sanity-check the xid count before palloc */
1495  if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
1496  ereport(ERROR,
1497  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1498  errmsg("invalid snapshot data in file \"%s\"", path)));
1499 
1500  snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1501  for (i = 0; i < xcnt; i++)
1502  snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
1503  }
1504  else
1505  {
1506  snapshot.subxcnt = 0;
1507  snapshot.subxip = NULL;
1508  }
1509 
1510  snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1511 
1512  /*
1513  * Do some additional sanity checking, just to protect ourselves. We
1514  * don't trouble to check the array elements, just the most critical
1515  * fields.
1516  */
1517  if (!TransactionIdIsNormal(src_xid) ||
1518  !OidIsValid(src_dbid) ||
1519  !TransactionIdIsNormal(snapshot.xmin) ||
1520  !TransactionIdIsNormal(snapshot.xmax))
1521  ereport(ERROR,
1522  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1523  errmsg("invalid snapshot data in file \"%s\"", path)));
1524 
1525  /*
1526  * If we're serializable, the source transaction must be too, otherwise
1527  * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a
1528  * non-read-only transaction can't adopt a snapshot from a read-only
1529  * transaction, as predicate.c handles the cases very differently.
1530  */
1532  {
1533  if (src_isolevel != XACT_SERIALIZABLE)
1534  ereport(ERROR,
1535  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1536  errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
1537  if (src_readonly && !XactReadOnly)
1538  ereport(ERROR,
1539  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1540  errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1541  }
1542 
1543  /*
1544  * We cannot import a snapshot that was taken in a different database,
1545  * because vacuum calculates OldestXmin on a per-database basis; so the
1546  * source transaction's xmin doesn't protect us from data loss. This
1547  * restriction could be removed if the source transaction were to mark its
1548  * xmin as being globally applicable. But that would require some
1549  * additional syntax, since that has to be known when the snapshot is
1550  * initially taken. (See pgsql-hackers discussion of 2011-10-21.)
1551  */
1552  if (src_dbid != MyDatabaseId)
1553  ereport(ERROR,
1554  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1555  errmsg("cannot import a snapshot from a different database")));
1556 
1557  /* OK, install the snapshot */
1558  SetTransactionSnapshot(&snapshot, src_xid, NULL);
1559 }
1560 
1561 /*
1562  * XactHasExportedSnapshots
1563  * Test whether current transaction has exported any snapshots.
1564  */
1565 bool
1567 {
1568  return (exportedSnapshots != NIL);
1569 }
1570 
1571 /*
1572  * DeleteAllExportedSnapshotFiles
1573  * Clean up any files that have been left behind by a crashed backend
1574  * that had exported snapshots before it died.
1575  *
1576  * This should be called during database startup or crash recovery.
1577  */
1578 void
1580 {
1581  char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
1582  DIR *s_dir;
1583  struct dirent *s_de;
1584 
1585  if (!(s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR)))
1586  {
1587  /*
1588  * We really should have that directory in a sane cluster setup. But
1589  * then again if we don't, it's not fatal enough to make it FATAL.
1590  * Since we're running in the postmaster, LOG is our best bet.
1591  */
1592  elog(LOG, "could not open directory \"%s\": %m", SNAPSHOT_EXPORT_DIR);
1593  return;
1594  }
1595 
1596  while ((s_de = ReadDir(s_dir, SNAPSHOT_EXPORT_DIR)) != NULL)
1597  {
1598  if (strcmp(s_de->d_name, ".") == 0 ||
1599  strcmp(s_de->d_name, "..") == 0)
1600  continue;
1601 
1602  snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1603  /* Again, unlink failure is not worthy of FATAL */
1604  if (unlink(buf))
1605  elog(LOG, "could not unlink file \"%s\": %m", buf);
1606  }
1607 
1608  FreeDir(s_dir);
1609 }
1610 
1611 bool
1613 {
1614  if (pairingheap_is_empty(&RegisteredSnapshots) ||
1615  pairingheap_is_singular(&RegisteredSnapshots))
1616  return true;
1617 
1618  return false;
1619 }
1620 
1621 
1622 /*
1623  * Return a timestamp that is exactly on a minute boundary.
1624  *
1625  * If the argument is already aligned, return that value, otherwise move to
1626  * the next minute boundary following the given time.
1627  */
1628 static TimestampTz
1630 {
1631  TimestampTz retval = ts + (USECS_PER_MINUTE - 1);
1632 
1633  return retval - (retval % USECS_PER_MINUTE);
1634 }
1635 
1636 /*
1637  * Get current timestamp for snapshots
1638  *
1639  * This is basically GetCurrentTimestamp(), but with a guarantee that
1640  * the result never moves backward.
1641  */
1644 {
1646 
1647  /*
1648  * Don't let time move backward; if it hasn't advanced, use the old value.
1649  */
1650  SpinLockAcquire(&oldSnapshotControl->mutex_current);
1651  if (now <= oldSnapshotControl->current_timestamp)
1652  now = oldSnapshotControl->current_timestamp;
1653  else
1654  oldSnapshotControl->current_timestamp = now;
1655  SpinLockRelease(&oldSnapshotControl->mutex_current);
1656 
1657  return now;
1658 }
1659 
1660 /*
1661  * Get timestamp through which vacuum may have processed based on last stored
1662  * value for threshold_timestamp.
1663  *
1664  * XXX: So far, we never trust that a 64-bit value can be read atomically; if
1665  * that ever changes, we could get rid of the spinlock here.
1666  */
1669 {
1670  TimestampTz threshold_timestamp;
1671 
1672  SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
1673  threshold_timestamp = oldSnapshotControl->threshold_timestamp;
1674  SpinLockRelease(&oldSnapshotControl->mutex_threshold);
1675 
1676  return threshold_timestamp;
1677 }
1678 
1679 static void
1681 {
1682  SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
1683  oldSnapshotControl->threshold_timestamp = ts;
1684  oldSnapshotControl->threshold_xid = xlimit;
1685  SpinLockRelease(&oldSnapshotControl->mutex_threshold);
1686 }
1687 
1688 /*
1689  * TransactionIdLimitedForOldSnapshots
1690  *
1691  * Apply old snapshot limit, if any. This is intended to be called for page
1692  * pruning and table vacuuming, to allow old_snapshot_threshold to override
1693  * the normal global xmin value. Actual testing for snapshot too old will be
1694  * based on whether a snapshot timestamp is prior to the threshold timestamp
1695  * set in this function.
1696  */
1699  Relation relation)
1700 {
1701  if (TransactionIdIsNormal(recentXmin)
1702  && old_snapshot_threshold >= 0
1703  && RelationAllowsEarlyPruning(relation))
1704  {
1706  TransactionId xlimit = recentXmin;
1707  TransactionId latest_xmin;
1708  TimestampTz update_ts;
1709  bool same_ts_as_threshold = false;
1710 
1711  SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
1712  latest_xmin = oldSnapshotControl->latest_xmin;
1713  update_ts = oldSnapshotControl->next_map_update;
1714  SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
1715 
1716  /*
1717  * Zero threshold always overrides to latest xmin, if valid. Without
1718  * some heuristic it will find its own snapshot too old on, for
1719  * example, a simple UPDATE -- which would make it useless for most
1720  * testing, but there is no principled way to ensure that it doesn't
1721  * fail in this way. Use a five-second delay to try to get useful
1722  * testing behavior, but this may need adjustment.
1723  */
1724  if (old_snapshot_threshold == 0)
1725  {
1726  if (TransactionIdPrecedes(latest_xmin, MyPgXact->xmin)
1727  && TransactionIdFollows(latest_xmin, xlimit))
1728  xlimit = latest_xmin;
1729 
1730  ts -= 5 * USECS_PER_SEC;
1732 
1733  return xlimit;
1734  }
1735 
1738 
1739  /* Check for fast exit without LW locking. */
1740  SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
1741  if (ts == oldSnapshotControl->threshold_timestamp)
1742  {
1743  xlimit = oldSnapshotControl->threshold_xid;
1744  same_ts_as_threshold = true;
1745  }
1746  SpinLockRelease(&oldSnapshotControl->mutex_threshold);
1747 
1748  if (!same_ts_as_threshold)
1749  {
1750  if (ts == update_ts)
1751  {
1752  xlimit = latest_xmin;
1753  if (NormalTransactionIdFollows(xlimit, recentXmin))
1755  }
1756  else
1757  {
1758  LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
1759 
1760  if (oldSnapshotControl->count_used > 0
1761  && ts >= oldSnapshotControl->head_timestamp)
1762  {
1763  int offset;
1764 
1765  offset = ((ts - oldSnapshotControl->head_timestamp)
1766  / USECS_PER_MINUTE);
1767  if (offset > oldSnapshotControl->count_used - 1)
1768  offset = oldSnapshotControl->count_used - 1;
1769  offset = (oldSnapshotControl->head_offset + offset)
1771  xlimit = oldSnapshotControl->xid_by_minute[offset];
1772 
1773  if (NormalTransactionIdFollows(xlimit, recentXmin))
1775  }
1776 
1777  LWLockRelease(OldSnapshotTimeMapLock);
1778  }
1779  }
1780 
1781  /*
1782  * Failsafe protection against vacuuming work of active transaction.
1783  *
1784  * This is not an assertion because we avoid the spinlock for
1785  * performance, leaving open the possibility that xlimit could advance
1786  * and be more current; but it seems prudent to apply this limit. It
1787  * might make pruning a tiny bit less aggressive than it could be, but
1788  * protects against data loss bugs.
1789  */
1790  if (TransactionIdIsNormal(latest_xmin)
1791  && TransactionIdPrecedes(latest_xmin, xlimit))
1792  xlimit = latest_xmin;
1793 
1794  if (NormalTransactionIdFollows(xlimit, recentXmin))
1795  return xlimit;
1796  }
1797 
1798  return recentXmin;
1799 }
1800 
1801 /*
1802  * Take care of the circular buffer that maps time to xid.
1803  */
1804 void
1806 {
1807  TimestampTz ts;
1808  TransactionId latest_xmin;
1809  TimestampTz update_ts;
1810  bool map_update_required = false;
1811 
1812  /* Never call this function when old snapshot checking is disabled. */
1814 
1815  ts = AlignTimestampToMinuteBoundary(whenTaken);
1816 
1817  /*
1818  * Keep track of the latest xmin seen by any process. Update mapping with
1819  * a new value when we have crossed a bucket boundary.
1820  */
1821  SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
1822  latest_xmin = oldSnapshotControl->latest_xmin;
1823  update_ts = oldSnapshotControl->next_map_update;
1824  if (ts > update_ts)
1825  {
1826  oldSnapshotControl->next_map_update = ts;
1827  map_update_required = true;
1828  }
1829  if (TransactionIdFollows(xmin, latest_xmin))
1830  oldSnapshotControl->latest_xmin = xmin;
1831  SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
1832 
1833  /* We only needed to update the most recent xmin value. */
1834  if (!map_update_required)
1835  return;
1836 
1837  /* No further tracking needed for 0 (used for testing). */
1838  if (old_snapshot_threshold == 0)
1839  return;
1840 
1841  /*
1842  * We don't want to do something stupid with unusual values, but we don't
1843  * want to litter the log with warnings or break otherwise normal
1844  * processing for this feature; so if something seems unreasonable, just
1845  * log at DEBUG level and return without doing anything.
1846  */
1847  if (whenTaken < 0)
1848  {
1849  elog(DEBUG1,
1850  "MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld",
1851  (long) whenTaken);
1852  return;
1853  }
1854  if (!TransactionIdIsNormal(xmin))
1855  {
1856  elog(DEBUG1,
1857  "MaintainOldSnapshotTimeMapping called with xmin = %lu",
1858  (unsigned long) xmin);
1859  return;
1860  }
1861 
1862  LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
1863 
1864  Assert(oldSnapshotControl->head_offset >= 0);
1865  Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES);
1866  Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0);
1867  Assert(oldSnapshotControl->count_used >= 0);
1868  Assert(oldSnapshotControl->count_used <= OLD_SNAPSHOT_TIME_MAP_ENTRIES);
1869 
1870  if (oldSnapshotControl->count_used == 0)
1871  {
1872  /* set up first entry for empty mapping */
1873  oldSnapshotControl->head_offset = 0;
1874  oldSnapshotControl->head_timestamp = ts;
1875  oldSnapshotControl->count_used = 1;
1876  oldSnapshotControl->xid_by_minute[0] = xmin;
1877  }
1878  else if (ts < oldSnapshotControl->head_timestamp)
1879  {
1880  /* old ts; log it at DEBUG */
1881  LWLockRelease(OldSnapshotTimeMapLock);
1882  elog(DEBUG1,
1883  "MaintainOldSnapshotTimeMapping called with old whenTaken = %ld",
1884  (long) whenTaken);
1885  return;
1886  }
1887  else if (ts <= (oldSnapshotControl->head_timestamp +
1888  ((oldSnapshotControl->count_used - 1)
1889  * USECS_PER_MINUTE)))
1890  {
1891  /* existing mapping; advance xid if possible */
1892  int bucket = (oldSnapshotControl->head_offset
1893  + ((ts - oldSnapshotControl->head_timestamp)
1894  / USECS_PER_MINUTE))
1896 
1897  if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin))
1898  oldSnapshotControl->xid_by_minute[bucket] = xmin;
1899  }
1900  else
1901  {
1902  /* We need a new bucket, but it might not be the very next one. */
1903  int advance = ((ts - oldSnapshotControl->head_timestamp)
1904  / USECS_PER_MINUTE);
1905 
1906  oldSnapshotControl->head_timestamp = ts;
1907 
1908  if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
1909  {
1910  /* Advance is so far that all old data is junk; start over. */
1911  oldSnapshotControl->head_offset = 0;
1912  oldSnapshotControl->count_used = 1;
1913  oldSnapshotControl->xid_by_minute[0] = xmin;
1914  }
1915  else
1916  {
1917  /* Store the new value in one or more buckets. */
1918  int i;
1919 
1920  for (i = 0; i < advance; i++)
1921  {
1922  if (oldSnapshotControl->count_used == OLD_SNAPSHOT_TIME_MAP_ENTRIES)
1923  {
1924  /* Map full and new value replaces old head. */
1925  int old_head = oldSnapshotControl->head_offset;
1926 
1927  if (old_head == (OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1))
1928  oldSnapshotControl->head_offset = 0;
1929  else
1930  oldSnapshotControl->head_offset = old_head + 1;
1931  oldSnapshotControl->xid_by_minute[old_head] = xmin;
1932  }
1933  else
1934  {
1935  /* Extend map to unused entry. */
1936  int new_tail = (oldSnapshotControl->head_offset
1937  + oldSnapshotControl->count_used)
1939 
1940  oldSnapshotControl->count_used++;
1941  oldSnapshotControl->xid_by_minute[new_tail] = xmin;
1942  }
1943  }
1944  }
1945  }
1946 
1947  LWLockRelease(OldSnapshotTimeMapLock);
1948 }
1949 
1950 
1951 /*
1952  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
1953  * access to behave just like it did at a certain point in the past.
1954  *
1955  * Needed for logical decoding.
1956  */
1957 void
1958 SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
1959 {
1960  Assert(historic_snapshot != NULL);
1961 
1962  /* setup the timetravel snapshot */
1963  HistoricSnapshot = historic_snapshot;
1964 
1965  /* setup (cmin, cmax) lookup hash */
1966  tuplecid_data = tuplecids;
1967 }
1968 
1969 
1970 /*
1971  * Make catalog snapshots behave normally again.
1972  */
1973 void
1975 {
1976  HistoricSnapshot = NULL;
1977  tuplecid_data = NULL;
1978 }
1979 
1980 bool
1982 {
1983  return HistoricSnapshot != NULL;
1984 }
1985 
1986 HTAB *
1988 {
1990  return tuplecid_data;
1991 }
1992 
1993 /*
1994  * EstimateSnapshotSpace
1995  * Returns the size needed to store the given snapshot.
1996  *
1997  * We are exporting only required fields from the Snapshot, stored in
1998  * SerializedSnapshotData.
1999  */
2000 Size
2002 {
2003  Size size;
2004 
2005  Assert(snap != InvalidSnapshot);
2007 
2008  /* We allocate any XID arrays needed in the same palloc block. */
2009  size = add_size(sizeof(SerializedSnapshotData),
2010  mul_size(snap->xcnt, sizeof(TransactionId)));
2011  if (snap->subxcnt > 0 &&
2012  (!snap->suboverflowed || snap->takenDuringRecovery))
2013  size = add_size(size,
2014  mul_size(snap->subxcnt, sizeof(TransactionId)));
2015 
2016  return size;
2017 }
2018 
2019 /*
2020  * SerializeSnapshot
2021  * Dumps the serialized snapshot (extracted from given snapshot) onto the
2022  * memory location at start_address.
2023  */
2024 void
2025 SerializeSnapshot(Snapshot snapshot, char *start_address)
2026 {
2027  SerializedSnapshotData serialized_snapshot;
2028 
2029  Assert(snapshot->subxcnt >= 0);
2030 
2031  /* Copy all required fields */
2032  serialized_snapshot.xmin = snapshot->xmin;
2033  serialized_snapshot.xmax = snapshot->xmax;
2034  serialized_snapshot.xcnt = snapshot->xcnt;
2035  serialized_snapshot.subxcnt = snapshot->subxcnt;
2036  serialized_snapshot.suboverflowed = snapshot->suboverflowed;
2037  serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
2038  serialized_snapshot.curcid = snapshot->curcid;
2039  serialized_snapshot.whenTaken = snapshot->whenTaken;
2040  serialized_snapshot.lsn = snapshot->lsn;
2041 
2042  /*
2043  * Ignore the SubXID array if it has overflowed, unless the snapshot was
2044  * taken during recovey - in that case, top-level XIDs are in subxip as
2045  * well, and we mustn't lose them.
2046  */
2047  if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
2048  serialized_snapshot.subxcnt = 0;
2049 
2050  /* Copy struct to possibly-unaligned buffer */
2051  memcpy(start_address,
2052  &serialized_snapshot, sizeof(SerializedSnapshotData));
2053 
2054  /* Copy XID array */
2055  if (snapshot->xcnt > 0)
2056  memcpy((TransactionId *) (start_address +
2057  sizeof(SerializedSnapshotData)),
2058  snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
2059 
2060  /*
2061  * Copy SubXID array. Don't bother to copy it if it had overflowed,
2062  * though, because it's not used anywhere in that case. Except if it's a
2063  * snapshot taken during recovery; all the top-level XIDs are in subxip as
2064  * well in that case, so we mustn't lose them.
2065  */
2066  if (serialized_snapshot.subxcnt > 0)
2067  {
2068  Size subxipoff = sizeof(SerializedSnapshotData) +
2069  snapshot->xcnt * sizeof(TransactionId);
2070 
2071  memcpy((TransactionId *) (start_address + subxipoff),
2072  snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
2073  }
2074 }
2075 
2076 /*
2077  * RestoreSnapshot
2078  * Restore a serialized snapshot from the specified address.
2079  *
2080  * The copy is palloc'd in TopTransactionContext and has initial refcounts set
2081  * to 0. The returned snapshot has the copied flag set.
2082  */
2083 Snapshot
2084 RestoreSnapshot(char *start_address)
2085 {
2086  SerializedSnapshotData serialized_snapshot;
2087  Size size;
2088  Snapshot snapshot;
2089  TransactionId *serialized_xids;
2090 
2091  memcpy(&serialized_snapshot, start_address,
2092  sizeof(SerializedSnapshotData));
2093  serialized_xids = (TransactionId *)
2094  (start_address + sizeof(SerializedSnapshotData));
2095 
2096  /* We allocate any XID arrays needed in the same palloc block. */
2097  size = sizeof(SnapshotData)
2098  + serialized_snapshot.xcnt * sizeof(TransactionId)
2099  + serialized_snapshot.subxcnt * sizeof(TransactionId);
2100 
2101  /* Copy all required fields */
2103  snapshot->satisfies = HeapTupleSatisfiesMVCC;
2104  snapshot->xmin = serialized_snapshot.xmin;
2105  snapshot->xmax = serialized_snapshot.xmax;
2106  snapshot->xip = NULL;
2107  snapshot->xcnt = serialized_snapshot.xcnt;
2108  snapshot->subxip = NULL;
2109  snapshot->subxcnt = serialized_snapshot.subxcnt;
2110  snapshot->suboverflowed = serialized_snapshot.suboverflowed;
2111  snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
2112  snapshot->curcid = serialized_snapshot.curcid;
2113  snapshot->whenTaken = serialized_snapshot.whenTaken;
2114  snapshot->lsn = serialized_snapshot.lsn;
2115 
2116  /* Copy XIDs, if present. */
2117  if (serialized_snapshot.xcnt > 0)
2118  {
2119  snapshot->xip = (TransactionId *) (snapshot + 1);
2120  memcpy(snapshot->xip, serialized_xids,
2121  serialized_snapshot.xcnt * sizeof(TransactionId));
2122  }
2123 
2124  /* Copy SubXIDs, if present. */
2125  if (serialized_snapshot.subxcnt > 0)
2126  {
2127  snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
2128  serialized_snapshot.xcnt;
2129  memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
2130  serialized_snapshot.subxcnt * sizeof(TransactionId));
2131  }
2132 
2133  /* Set the copied flag so that the caller will set refcounts correctly. */
2134  snapshot->regd_count = 0;
2135  snapshot->active_count = 0;
2136  snapshot->copied = true;
2137 
2138  return snapshot;
2139 }
2140 
2141 /*
2142  * Install a restored snapshot as the transaction snapshot.
2143  *
2144  * The second argument is of type void * so that snapmgr.h need not include
2145  * the declaration for PGPROC.
2146  */
2147 void
2148 RestoreTransactionSnapshot(Snapshot snapshot, void *master_pgproc)
2149 {
2150  SetTransactionSnapshot(snapshot, InvalidTransactionId, master_pgproc);
2151 }
void ImportSnapshot(const char *idstr)
Definition: snapmgr.c:1395
int slock_t
Definition: s_lock.h:888
#define NIL
Definition: pg_list.h:69
uint32 CommandId
Definition: c.h:411
int xactGetCommittedChildren(TransactionId **ptr)
Definition: xact.c:5084
bool ProcArrayInstallImportedXmin(TransactionId xmin, TransactionId sourcexid)
Definition: procarray.c:1796
SnapshotSatisfiesFunc satisfies
Definition: snapshot.h:54
pairingheap_node * pairingheap_first(pairingheap *heap)
Definition: pairingheap.c:130
Snapshot as_snap
Definition: snapmgr.c:182
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void UpdateActiveSnapshotCommandId(void)
Definition: snapmgr.c:776
SnapshotData CatalogSnapshotData
Definition: snapmgr.c:146
bool RelationHasSysCache(Oid relid)
Definition: syscache.c:1376
static TransactionId parseXidFromText(const char *prefix, char **s, const char *filename)
Definition: snapmgr.c:1364
#define DEBUG1
Definition: elog.h:25
TimestampTz GetOldSnapshotThresholdTimestamp(void)
Definition: snapmgr.c:1668
bool XactHasExportedSnapshots(void)
Definition: snapmgr.c:1566
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2084
static int32 next
Definition: blutils.c:210
MemoryContext TopTransactionContext
Definition: mcxt.c:48
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
uint32 TransactionId
Definition: c.h:397
bool copied
Definition: snapshot.h:94
static int parseIntFromText(const char *prefix, char **s, const char *filename)
Definition: snapmgr.c:1339
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:858
#define pairingheap_reset(h)
Definition: pairingheap.h:93
static void FreeSnapshot(Snapshot snapshot)
Definition: snapmgr.c:710
#define USECS_PER_SEC
Definition: timestamp.h:94
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1570
TransactionId xmin
Definition: proc.h:213
int64 TimestampTz
Definition: timestamp.h:39
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
void SetSerializableTransactionSnapshot(Snapshot snapshot, TransactionId sourcexid)
Definition: predicate.c:1660
char * pstrdup(const char *in)
Definition: mcxt.c:1077
slock_t mutex_threshold
Definition: snapmgr.c:91
#define SpinLockInit(lock)
Definition: spin.h:60
static void SnapshotResetXmin(void)
Definition: snapmgr.c:973
void RestoreTransactionSnapshot(Snapshot snapshot, void *master_pgproc)
Definition: snapmgr.c:2148
#define RelationAllowsEarlyPruning(rel)
Definition: snapmgr.h:38
void ResourceOwnerEnlargeSnapshots(ResourceOwner owner)
Definition: resowner.c:1143
TransactionId TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation)
Definition: snapmgr.c:1698
Snapshot GetCatalogSnapshot(Oid relid)
Definition: snapmgr.c:436
XLogRecPtr lsn
Definition: snapshot.h:112
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define IsolationUsesXactSnapshot()
Definition: xact.h:43
static Snapshot HistoricSnapshot
Definition: snapmgr.c:152
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:834
#define pairingheap_is_empty(h)
Definition: pairingheap.h:96
bool ThereAreNoPriorRegisteredSnapshots(void)
Definition: snapmgr.c:1612
#define USECS_PER_MINUTE
Definition: timestamp.h:93
int errcode(int sqlerrcode)
Definition: elog.c:575
TransactionId RecentXmin
Definition: snapmgr.c:165
#define PG_BINARY_W
Definition: c.h:1041
TimestampTz whenTaken
Definition: snapmgr.c:242
char * ExportSnapshot(Snapshot snapshot)
Definition: snapmgr.c:1159
bool suboverflowed
Definition: snapshot.h:91
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
void PopActiveSnapshot(void)
Definition: snapmgr.c:807
void TeardownHistoricSnapshot(bool is_error)
Definition: snapmgr.c:1974
static SnapshotData CurrentSnapshotData
Definition: snapmgr.c:144
static Snapshot FirstXactSnapshot
Definition: snapmgr.c:210
void AtSubCommit_Snapshot(int level)
Definition: snapmgr.c:997
TransactionId GetTopTransactionId(void)
Definition: xact.c:389
#define LOG
Definition: elog.h:26
struct SnapshotData * Snapshot
Definition: snapshot.h:23
unsigned int Oid
Definition: postgres_ext.h:31
Definition: dirent.h:9
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:300
uint32 regd_count
Definition: snapshot.h:108
#define OidIsValid(objectId)
Definition: c.h:538
#define PG_BINARY_R
Definition: c.h:1040
static Snapshot CurrentSnapshot
Definition: snapmgr.c:149
Snapshot GetNonHistoricCatalogSnapshot(Oid relid)
Definition: snapmgr.c:458
signed int int32
Definition: c.h:256
#define XACT_SERIALIZABLE
Definition: xact.h:31
PGXACT * MyPgXact
Definition: proc.c:68
TransactionId TransactionXmin
Definition: snapmgr.c:164
static List * exportedSnapshots
Definition: snapmgr.c:219
#define SNAPSHOT_EXPORT_DIR
Definition: snapmgr.c:213
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
struct ActiveSnapshotElt ActiveSnapshotElt
#define pairingheap_container(type, membername, ptr)
Definition: pairingheap.h:43
void AtSubAbort_Snapshot(int level)
Definition: snapmgr.c:1018
#define SpinLockAcquire(lock)
Definition: spin.h:62
Definition: dynahash.c:193
void pfree(void *pointer)
Definition: mcxt.c:950
bool IsInParallelMode(void)
Definition: xact.c:913
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:110
TimestampTz threshold_timestamp
Definition: snapmgr.c:92
Definition: dirent.c:25
#define FirstNormalTransactionId
Definition: transam.h:34
static Snapshot CatalogSnapshot
Definition: snapmgr.c:151
#define ERROR
Definition: elog.h:43
TimestampTz GetSnapshotCurrentTimestamp(void)
Definition: snapmgr.c:1643
static pairingheap RegisteredSnapshots
Definition: snapmgr.c:200
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2025
bool FirstSnapshotSet
Definition: snapmgr.c:203
slock_t mutex_latest_xmin
Definition: snapmgr.c:87
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
TimestampTz next_map_update
Definition: snapmgr.c:90
static ActiveSnapshotElt * OldestActiveSnapshot
Definition: snapmgr.c:191
#define MAXPGPATH
void PushCopiedSnapshot(Snapshot snapshot)
Definition: snapmgr.c:764
TimestampTz current_timestamp
Definition: snapmgr.c:86
TransactionId threshold_xid
Definition: snapmgr.c:93
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:189
struct SerializedSnapshotData SerializedSnapshotData
static void SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
Definition: snapmgr.c:1680
static char * buf
Definition: pg_test_fsync.c:66
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:728
TransactionId RecentGlobalXmin
Definition: snapmgr.c:166
void ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snapshot)
Definition: resowner.c:1163
int errdetail(const char *fmt,...)
Definition: elog.c:873
int errcode_for_file_access(void)
Definition: elog.c:598
struct SnapshotData SnapshotData
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2094
#define InvalidTransactionId
Definition: transam.h:31
struct ActiveSnapshotElt * as_next
Definition: snapmgr.c:184
Datum pg_export_snapshot(PG_FUNCTION_ARGS)
Definition: snapmgr.c:1324
unsigned int uint32
Definition: c.h:268
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2335
bool ActiveSnapshotSet(void)
Definition: snapmgr.c:846
TransactionId xmax
Definition: snapshot.h:67
TransactionId xmin
Definition: snapshot.h:66
TransactionId GetTopTransactionIdIfAny(void)
Definition: xact.c:404
static volatile OldSnapshotControlData * oldSnapshotControl
Definition: snapmgr.c:129
TransactionId RecentGlobalDataXmin
Definition: snapmgr.c:167
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
void SnapMgrInit(void)
Definition: snapmgr.c:263
#define pairingheap_const_container(type, membername, ptr)
Definition: pairingheap.h:51
void InvalidateCatalogSnapshot(void)
Definition: snapmgr.c:506
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId * xip
Definition: snapshot.h:77
static ActiveSnapshotElt * ActiveSnapshot
Definition: snapmgr.c:188
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:900
List * lappend(List *list, void *datum)
Definition: list.c:128
static HTAB * tuplecid_data
Definition: snapmgr.c:170
void initStringInfo(StringInfo str)
Definition: stringinfo.c:65
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define WARNING
Definition: elog.h:40
TransactionId xmax
Definition: snapmgr.c:236
#define InvalidSnapshot
Definition: snapshot.h:25
#define SpinLockRelease(lock)
Definition: spin.h:64
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2001
Size mul_size(Size s1, Size s2)
Definition: shmem.c:492
uintptr_t Datum
Definition: postgres.h:372
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
Oid MyDatabaseId
Definition: globals.c:76
Snapshot GetOldestSnapshot(void)
Definition: snapmgr.c:407
static SnapshotData SecondarySnapshotData
Definition: snapmgr.c:145
CommandId curcid
Definition: snapshot.h:96
static Snapshot CopySnapshot(Snapshot snapshot)
Definition: snapmgr.c:655
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:1455
Snapshot RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
Definition: snapmgr.c:871
bool XactReadOnly
Definition: xact.c:77
pairingheap_node ph_node
Definition: snapshot.h:109
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:761
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:330
text * cstring_to_text(const char *s)
Definition: varlena.c:149
#define NULL
Definition: c.h:229
uint64 XLogRecPtr
Definition: xlogdefs.h:21
TransactionId latest_xmin
Definition: snapmgr.c:89
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
bool RelationInvalidatesSnapshotsOnly(Oid relid)
Definition: syscache.c:1353
Snapshot GetSerializableTransactionSnapshot(Snapshot snapshot)
Definition: predicate.c:1620
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2401
void InvalidateCatalogSnapshotConditionally(void)
Definition: snapmgr.c:527
TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER]
Definition: snapmgr.c:126
bool takenDuringRecovery
Definition: snapshot.h:93
#define NormalTransactionIdFollows(id1, id2)
Definition: transam.h:67
size_t Size
Definition: c.h:356
Snapshot GetSnapshotData(Snapshot snapshot)
Definition: procarray.c:1508
static int list_length(const List *l)
Definition: pg_list.h:89
int XactIsoLevel
Definition: xact.c:74
void UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
Definition: snapmgr.c:913
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
slock_t mutex_current
Definition: snapmgr.c:85
static TimestampTz AlignTimestampToMinuteBoundary(TimestampTz ts)
Definition: snapmgr.c:1629
bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
Definition: procarray.c:1868
Snapshot GetLatestSnapshot(void)
Definition: snapmgr.c:375
void SnapshotSetCommandId(CommandId curcid)
Definition: snapmgr.c:540
int FreeFile(FILE *file)
Definition: fd.c:2277
bool IsSubTransaction(void)
Definition: xact.c:4376
HTAB * HistoricSnapshotGetTupleCids(void)
Definition: snapmgr.c:1987
static char * filename
Definition: pg_dumpall.c:87
static void SetTransactionSnapshot(Snapshot sourcesnap, TransactionId sourcexid, PGPROC *sourceproc)
Definition: snapmgr.c:561
uint32 xcnt
Definition: snapshot.h:78
void * palloc(Size size)
Definition: mcxt.c:849
bool HistoricSnapshotActive(void)
Definition: snapmgr.c:1981
int errmsg(const char *fmt,...)
Definition: elog.c:797
void ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snapshot)
Definition: resowner.c:1154
#define IsolationIsSerializable()
Definition: xact.h:44
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:707
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1579
void SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
Definition: snapmgr.c:1958
static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
Definition: snapmgr.c:939
int old_snapshot_threshold
Definition: snapmgr.c:74
TimestampTz head_timestamp
Definition: snapmgr.c:124
int i
TransactionId xmin
Definition: snapmgr.c:235
static Snapshot SecondarySnapshot
Definition: snapmgr.c:150
void * arg
struct OldSnapshotControlData OldSnapshotControlData
int GetMaxSnapshotSubxidCount(void)
Definition: procarray.c:1466
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
TimestampTz whenTaken
Definition: snapshot.h:111
void pairingheap_remove(pairingheap *heap, pairingheap_node *node)
Definition: pairingheap.c:170
void pairingheap_add(pairingheap *heap, pairingheap_node *node)
Definition: pairingheap.c:112
char d_name[MAX_PATH]
Definition: dirent.h:14
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:687
#define elog
Definition: elog.h:219
void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
Definition: snapmgr.c:1805
#define XactExportFilePath(path, xid, num, suffix)
Definition: snapmgr.c:214
#define OLD_SNAPSHOT_TIME_MAP_ENTRIES
Definition: snapmgr.h:32
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
Definition: tqual.c:963
Size SnapMgrShmemSize(void)
Definition: snapmgr.c:247
Definition: proc.h:94
Definition: pg_list.h:45
long val
Definition: informix.c:689
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1534
int FreeDir(DIR *dir)
Definition: fd.c:2444
#define offsetof(type, field)
Definition: c.h:555
void AtEOXact_Snapshot(bool isCommit, bool resetXmin)
Definition: snapmgr.c:1054
TransactionId * subxip
Definition: snapshot.h:89
#define pairingheap_is_singular(h)
Definition: pairingheap.h:99
uint32 active_count
Definition: snapshot.h:107
int32 subxcnt
Definition: snapshot.h:90