PostgreSQL Source Code  git master
snapbuild.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * snapbuild.c
4  *
5  * Infrastructure for building historic catalog snapshots based on contents
6  * of the WAL, for the purpose of decoding heapam.c style values in the
7  * WAL.
8  *
9  * NOTES:
10  *
11  * We build snapshots which can *only* be used to read catalog contents and we
12  * do so by reading and interpreting the WAL stream. The aim is to build a
13  * snapshot that behaves the same as a freshly taken MVCC snapshot would have
14  * at the time the XLogRecord was generated.
15  *
16  * To build the snapshots we reuse the infrastructure built for Hot
17  * Standby. The in-memory snapshots we build look different than HS' because
18  * we have different needs. To successfully decode data from the WAL we only
19  * need to access catalog tables and (sys|rel|cat)cache, not the actual user
20  * tables since the data we decode is wholly contained in the WAL
21  * records. Also, our snapshots need to be different in comparison to normal
22  * MVCC ones because in contrast to those we cannot fully rely on the clog and
23  * pg_subtrans for information about committed transactions because they might
24  * commit in the future from the POV of the WAL entry we're currently
25  * decoding. This definition has the advantage that we only need to prevent
26  * removal of catalog rows, while normal table's rows can still be
27  * removed. This is achieved by using the replication slot mechanism.
28  *
29  * As the percentage of transactions modifying the catalog normally is fairly
30  * small in comparisons to ones only manipulating user data, we keep track of
31  * the committed catalog modifying ones inside [xmin, xmax) instead of keeping
32  * track of all running transactions like it's done in a normal snapshot. Note
33  * that we're generally only looking at transactions that have acquired an
34  * xid. That is we keep a list of transactions between snapshot->(xmin, xmax)
35  * that we consider committed, everything else is considered aborted/in
36  * progress. That also allows us not to care about subtransactions before they
37  * have committed which means this module, in contrast to HS, doesn't have to
38  * care about suboverflowed subtransactions and similar.
39  *
40  * One complexity of doing this is that to e.g. handle mixed DDL/DML
41  * transactions we need Snapshots that see intermediate versions of the
42  * catalog in a transaction. During normal operation this is achieved by using
43  * CommandIds/cmin/cmax. The problem with that however is that for space
44  * efficiency reasons only one value of that is stored
45  * (cf. combocid.c). Since combo CIDs are only available in memory we log
46  * additional information which allows us to get the original (cmin, cmax)
47  * pair during visibility checks. Check the reorderbuffer.c's comment above
48  * ResolveCminCmaxDuringDecoding() for details.
49  *
50  * To facilitate all this we need our own visibility routine, as the normal
51  * ones are optimized for different usecases.
52  *
53  * To replace the normal catalog snapshots with decoding ones use the
54  * SetupHistoricSnapshot() and TeardownHistoricSnapshot() functions.
55  *
56  *
57  *
58  * The snapbuild machinery is starting up in several stages, as illustrated
59  * by the following graph describing the SnapBuild->state transitions:
60  *
61  * +-------------------------+
62  * +----| START |-------------+
63  * | +-------------------------+ |
64  * | | |
65  * | | |
66  * | running_xacts #1 |
67  * | | |
68  * | | |
69  * | v |
70  * | +-------------------------+ v
71  * | | BUILDING_SNAPSHOT |------------>|
72  * | +-------------------------+ |
73  * | | |
74  * | | |
75  * | running_xacts #2, xacts from #1 finished |
76  * | | |
77  * | | |
78  * | v |
79  * | +-------------------------+ v
80  * | | FULL_SNAPSHOT |------------>|
81  * | +-------------------------+ |
82  * | | |
83  * running_xacts | saved snapshot
84  * with zero xacts | at running_xacts's lsn
85  * | | |
86  * | running_xacts with xacts from #2 finished |
87  * | | |
88  * | v |
89  * | +-------------------------+ |
90  * +--->|SNAPBUILD_CONSISTENT |<------------+
91  * +-------------------------+
92  *
93  * Initially the machinery is in the START stage. When an xl_running_xacts
94  * record is read that is sufficiently new (above the safe xmin horizon),
95  * there's a state transition. If there were no running xacts when the
96  * running_xacts record was generated, we'll directly go into CONSISTENT
97  * state, otherwise we'll switch to the BUILDING_SNAPSHOT state. Having a full
98  * snapshot means that all transactions that start henceforth can be decoded
99  * in their entirety, but transactions that started previously can't. In
100  * FULL_SNAPSHOT we'll switch into CONSISTENT once all those previously
101  * running transactions have committed or aborted.
102  *
103  * Only transactions that commit after CONSISTENT state has been reached will
104  * be replayed, even though they might have started while still in
105  * FULL_SNAPSHOT. That ensures that we'll reach a point where no previous
106  * changes has been exported, but all the following ones will be. That point
107  * is a convenient point to initialize replication from, which is why we
108  * export a snapshot at that point, which *can* be used to read normal data.
109  *
110  * Copyright (c) 2012-2022, PostgreSQL Global Development Group
111  *
112  * IDENTIFICATION
113  * src/backend/replication/logical/snapbuild.c
114  *
115  *-------------------------------------------------------------------------
116  */
117 
118 #include "postgres.h"
119 
120 #include <sys/stat.h>
121 #include <unistd.h>
122 
123 #include "access/heapam_xlog.h"
124 #include "access/transam.h"
125 #include "access/xact.h"
126 #include "miscadmin.h"
127 #include "pgstat.h"
128 #include "replication/logical.h"
130 #include "replication/snapbuild.h"
131 #include "storage/block.h" /* debugging output */
132 #include "storage/fd.h"
133 #include "storage/lmgr.h"
134 #include "storage/proc.h"
135 #include "storage/procarray.h"
136 #include "storage/standby.h"
137 #include "utils/builtins.h"
138 #include "utils/memutils.h"
139 #include "utils/snapmgr.h"
140 #include "utils/snapshot.h"
141 
142 /*
143  * This struct contains the current state of the snapshot building
144  * machinery. Besides a forward declaration in the header, it is not exposed
145  * to the public, so we can easily change its contents.
146  */
147 struct SnapBuild
148 {
149  /* how far are we along building our first full snapshot */
151 
152  /* private memory context used to allocate memory for this module. */
154 
155  /* all transactions < than this have committed/aborted */
157 
158  /* all transactions >= than this are uncommitted */
160 
161  /*
162  * Don't replay commits from an LSN < this LSN. This can be set externally
163  * but it will also be advanced (never retreat) from within snapbuild.c.
164  */
166 
167  /*
168  * LSN at which two-phase decoding was enabled or LSN at which we found a
169  * consistent point at the time of slot creation.
170  *
171  * The prepared transactions, that were skipped because previously
172  * two-phase was not enabled or are not covered by initial snapshot, need
173  * to be sent later along with commit prepared and they must be before
174  * this point.
175  */
177 
178  /*
179  * Don't start decoding WAL until the "xl_running_xacts" information
180  * indicates there are no running xids with an xid smaller than this.
181  */
183 
184  /* Indicates if we are building full snapshot or just catalog one. */
186 
187  /*
188  * Snapshot that's valid to see the catalog state seen at this moment.
189  */
191 
192  /*
193  * LSN of the last location we are sure a snapshot has been serialized to.
194  */
196 
197  /*
198  * The reorderbuffer we need to update with usable snapshots et al.
199  */
201 
202  /*
203  * TransactionId at which the next phase of initial snapshot building will
204  * happen. InvalidTransactionId if not known (i.e. SNAPBUILD_START), or
205  * when no next phase necessary (SNAPBUILD_CONSISTENT).
206  */
208 
209  /*
210  * Array of transactions which could have catalog changes that committed
211  * between xmin and xmax.
212  */
213  struct
214  {
215  /* number of committed transactions */
216  size_t xcnt;
217 
218  /* available space for committed transactions */
219  size_t xcnt_space;
220 
221  /*
222  * Until we reach a CONSISTENT state, we record commits of all
223  * transactions, not just the catalog changing ones. Record when that
224  * changes so we know we cannot export a snapshot safely anymore.
225  */
227 
228  /*
229  * Array of committed transactions that have modified the catalog.
230  *
231  * As this array is frequently modified we do *not* keep it in
232  * xidComparator order. Instead we sort the array when building &
233  * distributing a snapshot.
234  *
235  * TODO: It's unclear whether that reasoning has much merit. Every
236  * time we add something here after becoming consistent will also
237  * require distributing a snapshot. Storing them sorted would
238  * potentially also make it easier to purge (but more complicated wrt
239  * wraparound?). Should be improved if sorting while building the
240  * snapshot shows up in profiles.
241  */
244 };
245 
246 /*
247  * Starting a transaction -- which we need to do while exporting a snapshot --
248  * removes knowledge about the previously used resowner, so we save it here.
249  */
251 static bool ExportInProgress = false;
252 
253 /* ->committed manipulation */
254 static void SnapBuildPurgeCommittedTxn(SnapBuild *builder);
255 
256 /* snapshot building/manipulation/distribution functions */
257 static Snapshot SnapBuildBuildSnapshot(SnapBuild *builder);
258 
259 static void SnapBuildFreeSnapshot(Snapshot snap);
260 
261 static void SnapBuildSnapIncRefcount(Snapshot snap);
262 
264 
265 /* xlog reading helper functions for SnapBuildProcessRunningXacts */
266 static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running);
267 static void SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff);
268 
269 /* serialization functions */
270 static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn);
271 static bool SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn);
272 
273 /*
274  * Allocate a new snapshot builder.
275  *
276  * xmin_horizon is the xid >= which we can be sure no catalog rows have been
277  * removed, start_lsn is the LSN >= we want to replay commits.
278  */
279 SnapBuild *
281  TransactionId xmin_horizon,
282  XLogRecPtr start_lsn,
283  bool need_full_snapshot,
284  XLogRecPtr two_phase_at)
285 {
286  MemoryContext context;
287  MemoryContext oldcontext;
288  SnapBuild *builder;
289 
290  /* allocate memory in own context, to have better accountability */
292  "snapshot builder context",
294  oldcontext = MemoryContextSwitchTo(context);
295 
296  builder = palloc0(sizeof(SnapBuild));
297 
298  builder->state = SNAPBUILD_START;
299  builder->context = context;
300  builder->reorder = reorder;
301  /* Other struct members initialized by zeroing via palloc0 above */
302 
303  builder->committed.xcnt = 0;
304  builder->committed.xcnt_space = 128; /* arbitrary number */
305  builder->committed.xip =
306  palloc0(builder->committed.xcnt_space * sizeof(TransactionId));
307  builder->committed.includes_all_transactions = true;
308 
309  builder->initial_xmin_horizon = xmin_horizon;
310  builder->start_decoding_at = start_lsn;
311  builder->building_full_snapshot = need_full_snapshot;
312  builder->two_phase_at = two_phase_at;
313 
314  MemoryContextSwitchTo(oldcontext);
315 
316  return builder;
317 }
318 
319 /*
320  * Free a snapshot builder.
321  */
322 void
324 {
325  MemoryContext context = builder->context;
326 
327  /* free snapshot explicitly, that contains some error checking */
328  if (builder->snapshot != NULL)
329  {
331  builder->snapshot = NULL;
332  }
333 
334  /* other resources are deallocated via memory context reset */
335  MemoryContextDelete(context);
336 }
337 
338 /*
339  * Free an unreferenced snapshot that has previously been built by us.
340  */
341 static void
343 {
344  /* make sure we don't get passed an external snapshot */
346 
347  /* make sure nobody modified our snapshot */
348  Assert(snap->curcid == FirstCommandId);
349  Assert(!snap->suboverflowed);
350  Assert(!snap->takenDuringRecovery);
351  Assert(snap->regd_count == 0);
352 
353  /* slightly more likely, so it's checked even without c-asserts */
354  if (snap->copied)
355  elog(ERROR, "cannot free a copied snapshot");
356 
357  if (snap->active_count)
358  elog(ERROR, "cannot free an active snapshot");
359 
360  pfree(snap);
361 }
362 
363 /*
364  * In which state of snapshot building are we?
365  */
368 {
369  return builder->state;
370 }
371 
372 /*
373  * Return the LSN at which the two-phase decoding was first enabled.
374  */
377 {
378  return builder->two_phase_at;
379 }
380 
381 /*
382  * Set the LSN at which two-phase decoding is enabled.
383  */
384 void
386 {
387  builder->two_phase_at = ptr;
388 }
389 
390 /*
391  * Should the contents of transaction ending at 'ptr' be decoded?
392  */
393 bool
395 {
396  return ptr < builder->start_decoding_at;
397 }
398 
399 /*
400  * Increase refcount of a snapshot.
401  *
402  * This is used when handing out a snapshot to some external resource or when
403  * adding a Snapshot as builder->snapshot.
404  */
405 static void
407 {
408  snap->active_count++;
409 }
410 
411 /*
412  * Decrease refcount of a snapshot and free if the refcount reaches zero.
413  *
414  * Externally visible, so that external resources that have been handed an
415  * IncRef'ed Snapshot can adjust its refcount easily.
416  */
417 void
419 {
420  /* make sure we don't get passed an external snapshot */
422 
423  /* make sure nobody modified our snapshot */
424  Assert(snap->curcid == FirstCommandId);
425  Assert(!snap->suboverflowed);
426  Assert(!snap->takenDuringRecovery);
427 
428  Assert(snap->regd_count == 0);
429 
430  Assert(snap->active_count > 0);
431 
432  /* slightly more likely, so it's checked even without casserts */
433  if (snap->copied)
434  elog(ERROR, "cannot free a copied snapshot");
435 
436  snap->active_count--;
437  if (snap->active_count == 0)
438  SnapBuildFreeSnapshot(snap);
439 }
440 
441 /*
442  * Build a new snapshot, based on currently committed catalog-modifying
443  * transactions.
444  *
445  * In-progress transactions with catalog access are *not* allowed to modify
446  * these snapshots; they have to copy them and fill in appropriate ->curcid
447  * and ->subxip/subxcnt values.
448  */
449 static Snapshot
451 {
452  Snapshot snapshot;
453  Size ssize;
454 
455  Assert(builder->state >= SNAPBUILD_FULL_SNAPSHOT);
456 
457  ssize = sizeof(SnapshotData)
458  + sizeof(TransactionId) * builder->committed.xcnt
459  + sizeof(TransactionId) * 1 /* toplevel xid */ ;
460 
461  snapshot = MemoryContextAllocZero(builder->context, ssize);
462 
464 
465  /*
466  * We misuse the original meaning of SnapshotData's xip and subxip fields
467  * to make the more fitting for our needs.
468  *
469  * In the 'xip' array we store transactions that have to be treated as
470  * committed. Since we will only ever look at tuples from transactions
471  * that have modified the catalog it's more efficient to store those few
472  * that exist between xmin and xmax (frequently there are none).
473  *
474  * Snapshots that are used in transactions that have modified the catalog
475  * also use the 'subxip' array to store their toplevel xid and all the
476  * subtransaction xids so we can recognize when we need to treat rows as
477  * visible that are not in xip but still need to be visible. Subxip only
478  * gets filled when the transaction is copied into the context of a
479  * catalog modifying transaction since we otherwise share a snapshot
480  * between transactions. As long as a txn hasn't modified the catalog it
481  * doesn't need to treat any uncommitted rows as visible, so there is no
482  * need for those xids.
483  *
484  * Both arrays are qsort'ed so that we can use bsearch() on them.
485  */
486  Assert(TransactionIdIsNormal(builder->xmin));
487  Assert(TransactionIdIsNormal(builder->xmax));
488 
489  snapshot->xmin = builder->xmin;
490  snapshot->xmax = builder->xmax;
491 
492  /* store all transactions to be treated as committed by this snapshot */
493  snapshot->xip =
494  (TransactionId *) ((char *) snapshot + sizeof(SnapshotData));
495  snapshot->xcnt = builder->committed.xcnt;
496  memcpy(snapshot->xip,
497  builder->committed.xip,
498  builder->committed.xcnt * sizeof(TransactionId));
499 
500  /* sort so we can bsearch() */
501  qsort(snapshot->xip, snapshot->xcnt, sizeof(TransactionId), xidComparator);
502 
503  /*
504  * Initially, subxip is empty, i.e. it's a snapshot to be used by
505  * transactions that don't modify the catalog. Will be filled by
506  * ReorderBufferCopySnap() if necessary.
507  */
508  snapshot->subxcnt = 0;
509  snapshot->subxip = NULL;
510 
511  snapshot->suboverflowed = false;
512  snapshot->takenDuringRecovery = false;
513  snapshot->copied = false;
514  snapshot->curcid = FirstCommandId;
515  snapshot->active_count = 0;
516  snapshot->regd_count = 0;
517  snapshot->snapXactCompletionCount = 0;
518 
519  return snapshot;
520 }
521 
522 /*
523  * Build the initial slot snapshot and convert it to a normal snapshot that
524  * is understood by HeapTupleSatisfiesMVCC.
525  *
526  * The snapshot will be usable directly in current transaction or exported
527  * for loading in different transaction.
528  */
529 Snapshot
531 {
532  Snapshot snap;
533  TransactionId xid;
534  TransactionId *newxip;
535  int newxcnt = 0;
536 
539 
540  if (builder->state != SNAPBUILD_CONSISTENT)
541  elog(ERROR, "cannot build an initial slot snapshot before reaching a consistent state");
542 
543  if (!builder->committed.includes_all_transactions)
544  elog(ERROR, "cannot build an initial slot snapshot, not all transactions are monitored anymore");
545 
546  /* so we don't overwrite the existing value */
548  elog(ERROR, "cannot build an initial slot snapshot when MyProc->xmin already is valid");
549 
550  snap = SnapBuildBuildSnapshot(builder);
551 
552  /*
553  * We know that snap->xmin is alive, enforced by the logical xmin
554  * mechanism. Due to that we can do this without locks, we're only
555  * changing our own value.
556  */
557 #ifdef USE_ASSERT_CHECKING
558  {
559  TransactionId safeXid;
560 
561  LWLockAcquire(ProcArrayLock, LW_SHARED);
562  safeXid = GetOldestSafeDecodingTransactionId(false);
563  LWLockRelease(ProcArrayLock);
564 
565  Assert(TransactionIdPrecedesOrEquals(safeXid, snap->xmin));
566  }
567 #endif
568 
569  MyProc->xmin = snap->xmin;
570 
571  /* allocate in transaction context */
572  newxip = (TransactionId *)
574 
575  /*
576  * snapbuild.c builds transactions in an "inverted" manner, which means it
577  * stores committed transactions in ->xip, not ones in progress. Build a
578  * classical snapshot by marking all non-committed transactions as
579  * in-progress. This can be expensive.
580  */
581  for (xid = snap->xmin; NormalTransactionIdPrecedes(xid, snap->xmax);)
582  {
583  void *test;
584 
585  /*
586  * Check whether transaction committed using the decoding snapshot
587  * meaning of ->xip.
588  */
589  test = bsearch(&xid, snap->xip, snap->xcnt,
590  sizeof(TransactionId), xidComparator);
591 
592  if (test == NULL)
593  {
594  if (newxcnt >= GetMaxSnapshotXidCount())
595  ereport(ERROR,
597  errmsg("initial slot snapshot too large")));
598 
599  newxip[newxcnt++] = xid;
600  }
601 
603  }
604 
605  /* adjust remaining snapshot fields as needed */
607  snap->xcnt = newxcnt;
608  snap->xip = newxip;
609 
610  return snap;
611 }
612 
613 /*
614  * Export a snapshot so it can be set in another session with SET TRANSACTION
615  * SNAPSHOT.
616  *
617  * For that we need to start a transaction in the current backend as the
618  * importing side checks whether the source transaction is still open to make
619  * sure the xmin horizon hasn't advanced since then.
620  */
621 const char *
623 {
624  Snapshot snap;
625  char *snapname;
626 
628  elog(ERROR, "cannot export a snapshot from within a transaction");
629 
631  elog(ERROR, "can only export one snapshot at a time");
632 
634  ExportInProgress = true;
635 
637 
638  /* There doesn't seem to a nice API to set these */
640  XactReadOnly = true;
641 
642  snap = SnapBuildInitialSnapshot(builder);
643 
644  /*
645  * now that we've built a plain snapshot, make it active and use the
646  * normal mechanisms for exporting it
647  */
648  snapname = ExportSnapshot(snap);
649 
650  ereport(LOG,
651  (errmsg_plural("exported logical decoding snapshot: \"%s\" with %u transaction ID",
652  "exported logical decoding snapshot: \"%s\" with %u transaction IDs",
653  snap->xcnt,
654  snapname, snap->xcnt)));
655  return snapname;
656 }
657 
658 /*
659  * Ensure there is a snapshot and if not build one for current transaction.
660  */
661 Snapshot
663 {
664  Assert(builder->state == SNAPBUILD_CONSISTENT);
665 
666  /* only build a new snapshot if we don't have a prebuilt one */
667  if (builder->snapshot == NULL)
668  {
669  builder->snapshot = SnapBuildBuildSnapshot(builder);
670  /* increase refcount for the snapshot builder */
672  }
673 
674  return builder->snapshot;
675 }
676 
677 /*
678  * Reset a previously SnapBuildExportSnapshot()'ed snapshot if there is
679  * any. Aborts the previously started transaction and resets the resource
680  * owner back to its original value.
681  */
682 void
684 {
685  ResourceOwner tmpResOwner;
686 
687  /* nothing exported, that is the usual case */
688  if (!ExportInProgress)
689  return;
690 
691  if (!IsTransactionState())
692  elog(ERROR, "clearing exported snapshot in wrong transaction state");
693 
694  /*
695  * AbortCurrentTransaction() takes care of resetting the snapshot state,
696  * so remember SavedResourceOwnerDuringExport.
697  */
698  tmpResOwner = SavedResourceOwnerDuringExport;
699 
700  /* make sure nothing could have ever happened */
702 
703  CurrentResourceOwner = tmpResOwner;
704 }
705 
706 /*
707  * Clear snapshot export state during transaction abort.
708  */
709 void
711 {
713  ExportInProgress = false;
714 }
715 
716 /*
717  * Handle the effects of a single heap change, appropriate to the current state
718  * of the snapshot builder and returns whether changes made at (xid, lsn) can
719  * be decoded.
720  */
721 bool
723 {
724  /*
725  * We can't handle data in transactions if we haven't built a snapshot
726  * yet, so don't store them.
727  */
728  if (builder->state < SNAPBUILD_FULL_SNAPSHOT)
729  return false;
730 
731  /*
732  * No point in keeping track of changes in transactions that we don't have
733  * enough information about to decode. This means that they started before
734  * we got into the SNAPBUILD_FULL_SNAPSHOT state.
735  */
736  if (builder->state < SNAPBUILD_CONSISTENT &&
737  TransactionIdPrecedes(xid, builder->next_phase_at))
738  return false;
739 
740  /*
741  * If the reorderbuffer doesn't yet have a snapshot, add one now, it will
742  * be needed to decode the change we're currently processing.
743  */
744  if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, xid))
745  {
746  /* only build a new snapshot if we don't have a prebuilt one */
747  if (builder->snapshot == NULL)
748  {
749  builder->snapshot = SnapBuildBuildSnapshot(builder);
750  /* increase refcount for the snapshot builder */
752  }
753 
754  /*
755  * Increase refcount for the transaction we're handing the snapshot
756  * out to.
757  */
759  ReorderBufferSetBaseSnapshot(builder->reorder, xid, lsn,
760  builder->snapshot);
761  }
762 
763  return true;
764 }
765 
766 /*
767  * Do CommandId/combo CID handling after reading an xl_heap_new_cid record.
768  * This implies that a transaction has done some form of write to system
769  * catalogs.
770  */
771 void
773  XLogRecPtr lsn, xl_heap_new_cid *xlrec)
774 {
775  CommandId cid;
776 
777  /*
778  * we only log new_cid's if a catalog tuple was modified, so mark the
779  * transaction as containing catalog modifications
780  */
781  ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
782 
783  ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
784  xlrec->target_node, xlrec->target_tid,
785  xlrec->cmin, xlrec->cmax,
786  xlrec->combocid);
787 
788  /* figure out new command id */
789  if (xlrec->cmin != InvalidCommandId &&
790  xlrec->cmax != InvalidCommandId)
791  cid = Max(xlrec->cmin, xlrec->cmax);
792  else if (xlrec->cmax != InvalidCommandId)
793  cid = xlrec->cmax;
794  else if (xlrec->cmin != InvalidCommandId)
795  cid = xlrec->cmin;
796  else
797  {
798  cid = InvalidCommandId; /* silence compiler */
799  elog(ERROR, "xl_heap_new_cid record without a valid CommandId");
800  }
801 
802  ReorderBufferAddNewCommandId(builder->reorder, xid, lsn, cid + 1);
803 }
804 
805 /*
806  * Add a new Snapshot to all transactions we're decoding that currently are
807  * in-progress so they can see new catalog contents made by the transaction
808  * that just committed. This is necessary because those in-progress
809  * transactions will use the new catalog's contents from here on (at the very
810  * least everything they do needs to be compatible with newer catalog
811  * contents).
812  */
813 static void
815 {
816  dlist_iter txn_i;
817  ReorderBufferTXN *txn;
818 
819  /*
820  * Iterate through all toplevel transactions. This can include
821  * subtransactions which we just don't yet know to be that, but that's
822  * fine, they will just get an unnecessary snapshot queued.
823  */
824  dlist_foreach(txn_i, &builder->reorder->toplevel_by_lsn)
825  {
826  txn = dlist_container(ReorderBufferTXN, node, txn_i.cur);
827 
829 
830  /*
831  * If we don't have a base snapshot yet, there are no changes in this
832  * transaction which in turn implies we don't yet need a snapshot at
833  * all. We'll add a snapshot when the first change gets queued.
834  *
835  * NB: This works correctly even for subtransactions because
836  * ReorderBufferAssignChild() takes care to transfer the base snapshot
837  * to the top-level transaction, and while iterating the changequeue
838  * we'll get the change from the subtxn.
839  */
840  if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, txn->xid))
841  continue;
842 
843  /*
844  * We don't need to add snapshot to prepared transactions as they
845  * should not see the new catalog contents.
846  */
847  if (rbtxn_prepared(txn) || rbtxn_skip_prepared(txn))
848  continue;
849 
850  elog(DEBUG2, "adding a new snapshot to %u at %X/%X",
851  txn->xid, LSN_FORMAT_ARGS(lsn));
852 
853  /*
854  * increase the snapshot's refcount for the transaction we are handing
855  * it out to
856  */
858  ReorderBufferAddSnapshot(builder->reorder, txn->xid, lsn,
859  builder->snapshot);
860  }
861 }
862 
863 /*
864  * Keep track of a new catalog changing transaction that has committed.
865  */
866 static void
868 {
870 
871  if (builder->committed.xcnt == builder->committed.xcnt_space)
872  {
873  builder->committed.xcnt_space = builder->committed.xcnt_space * 2 + 1;
874 
875  elog(DEBUG1, "increasing space for committed transactions to %u",
876  (uint32) builder->committed.xcnt_space);
877 
878  builder->committed.xip = repalloc(builder->committed.xip,
879  builder->committed.xcnt_space * sizeof(TransactionId));
880  }
881 
882  /*
883  * TODO: It might make sense to keep the array sorted here instead of
884  * doing it every time we build a new snapshot. On the other hand this
885  * gets called repeatedly when a transaction with subtransactions commits.
886  */
887  builder->committed.xip[builder->committed.xcnt++] = xid;
888 }
889 
890 /*
891  * Remove knowledge about transactions we treat as committed that are smaller
892  * than ->xmin. Those won't ever get checked via the ->committed array but via
893  * the clog machinery, so we don't need to waste memory on them.
894  */
895 static void
897 {
898  int off;
899  TransactionId *workspace;
900  int surviving_xids = 0;
901 
902  /* not ready yet */
903  if (!TransactionIdIsNormal(builder->xmin))
904  return;
905 
906  /* TODO: Neater algorithm than just copying and iterating? */
907  workspace =
908  MemoryContextAlloc(builder->context,
909  builder->committed.xcnt * sizeof(TransactionId));
910 
911  /* copy xids that still are interesting to workspace */
912  for (off = 0; off < builder->committed.xcnt; off++)
913  {
914  if (NormalTransactionIdPrecedes(builder->committed.xip[off],
915  builder->xmin))
916  ; /* remove */
917  else
918  workspace[surviving_xids++] = builder->committed.xip[off];
919  }
920 
921  /* copy workspace back to persistent state */
922  memcpy(builder->committed.xip, workspace,
923  surviving_xids * sizeof(TransactionId));
924 
925  elog(DEBUG3, "purged committed transactions from %u to %u, xmin: %u, xmax: %u",
926  (uint32) builder->committed.xcnt, (uint32) surviving_xids,
927  builder->xmin, builder->xmax);
928  builder->committed.xcnt = surviving_xids;
929 
930  pfree(workspace);
931 }
932 
933 /*
934  * Handle everything that needs to be done when a transaction commits
935  */
936 void
938  int nsubxacts, TransactionId *subxacts)
939 {
940  int nxact;
941 
942  bool needs_snapshot = false;
943  bool needs_timetravel = false;
944  bool sub_needs_timetravel = false;
945 
946  TransactionId xmax = xid;
947 
948  /*
949  * Transactions preceding BUILDING_SNAPSHOT will neither be decoded, nor
950  * will they be part of a snapshot. So we don't need to record anything.
951  */
952  if (builder->state == SNAPBUILD_START ||
953  (builder->state == SNAPBUILD_BUILDING_SNAPSHOT &&
954  TransactionIdPrecedes(xid, builder->next_phase_at)))
955  {
956  /* ensure that only commits after this are getting replayed */
957  if (builder->start_decoding_at <= lsn)
958  builder->start_decoding_at = lsn + 1;
959  return;
960  }
961 
962  if (builder->state < SNAPBUILD_CONSISTENT)
963  {
964  /* ensure that only commits after this are getting replayed */
965  if (builder->start_decoding_at <= lsn)
966  builder->start_decoding_at = lsn + 1;
967 
968  /*
969  * If building an exportable snapshot, force xid to be tracked, even
970  * if the transaction didn't modify the catalog.
971  */
972  if (builder->building_full_snapshot)
973  {
974  needs_timetravel = true;
975  }
976  }
977 
978  for (nxact = 0; nxact < nsubxacts; nxact++)
979  {
980  TransactionId subxid = subxacts[nxact];
981 
982  /*
983  * Add subtransaction to base snapshot if catalog modifying, we don't
984  * distinguish to toplevel transactions there.
985  */
986  if (ReorderBufferXidHasCatalogChanges(builder->reorder, subxid))
987  {
988  sub_needs_timetravel = true;
989  needs_snapshot = true;
990 
991  elog(DEBUG1, "found subtransaction %u:%u with catalog changes",
992  xid, subxid);
993 
994  SnapBuildAddCommittedTxn(builder, subxid);
995 
996  if (NormalTransactionIdFollows(subxid, xmax))
997  xmax = subxid;
998  }
999 
1000  /*
1001  * If we're forcing timetravel we also need visibility information
1002  * about subtransaction, so keep track of subtransaction's state, even
1003  * if not catalog modifying. Don't need to distribute a snapshot in
1004  * that case.
1005  */
1006  else if (needs_timetravel)
1007  {
1008  SnapBuildAddCommittedTxn(builder, subxid);
1009  if (NormalTransactionIdFollows(subxid, xmax))
1010  xmax = subxid;
1011  }
1012  }
1013 
1014  /* if top-level modified catalog, it'll need a snapshot */
1015  if (ReorderBufferXidHasCatalogChanges(builder->reorder, xid))
1016  {
1017  elog(DEBUG2, "found top level transaction %u, with catalog changes",
1018  xid);
1019  needs_snapshot = true;
1020  needs_timetravel = true;
1021  SnapBuildAddCommittedTxn(builder, xid);
1022  }
1023  else if (sub_needs_timetravel)
1024  {
1025  /* track toplevel txn as well, subxact alone isn't meaningful */
1026  SnapBuildAddCommittedTxn(builder, xid);
1027  }
1028  else if (needs_timetravel)
1029  {
1030  elog(DEBUG2, "forced transaction %u to do timetravel", xid);
1031 
1032  SnapBuildAddCommittedTxn(builder, xid);
1033  }
1034 
1035  if (!needs_timetravel)
1036  {
1037  /* record that we cannot export a general snapshot anymore */
1038  builder->committed.includes_all_transactions = false;
1039  }
1040 
1041  Assert(!needs_snapshot || needs_timetravel);
1042 
1043  /*
1044  * Adjust xmax of the snapshot builder, we only do that for committed,
1045  * catalog modifying, transactions, everything else isn't interesting for
1046  * us since we'll never look at the respective rows.
1047  */
1048  if (needs_timetravel &&
1049  (!TransactionIdIsValid(builder->xmax) ||
1050  TransactionIdFollowsOrEquals(xmax, builder->xmax)))
1051  {
1052  builder->xmax = xmax;
1053  TransactionIdAdvance(builder->xmax);
1054  }
1055 
1056  /* if there's any reason to build a historic snapshot, do so now */
1057  if (needs_snapshot)
1058  {
1059  /*
1060  * If we haven't built a complete snapshot yet there's no need to hand
1061  * it out, it wouldn't (and couldn't) be used anyway.
1062  */
1063  if (builder->state < SNAPBUILD_FULL_SNAPSHOT)
1064  return;
1065 
1066  /*
1067  * Decrease the snapshot builder's refcount of the old snapshot, note
1068  * that it still will be used if it has been handed out to the
1069  * reorderbuffer earlier.
1070  */
1071  if (builder->snapshot)
1073 
1074  builder->snapshot = SnapBuildBuildSnapshot(builder);
1075 
1076  /* we might need to execute invalidations, add snapshot */
1077  if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, xid))
1078  {
1080  ReorderBufferSetBaseSnapshot(builder->reorder, xid, lsn,
1081  builder->snapshot);
1082  }
1083 
1084  /* refcount of the snapshot builder for the new snapshot */
1086 
1087  /* add a new catalog snapshot to all currently running transactions */
1089  }
1090 }
1091 
1092 
1093 /* -----------------------------------
1094  * Snapshot building functions dealing with xlog records
1095  * -----------------------------------
1096  */
1097 
1098 /*
1099  * Process a running xacts record, and use its information to first build a
1100  * historic snapshot and later to release resources that aren't needed
1101  * anymore.
1102  */
1103 void
1105 {
1106  ReorderBufferTXN *txn;
1107  TransactionId xmin;
1108 
1109  /*
1110  * If we're not consistent yet, inspect the record to see whether it
1111  * allows to get closer to being consistent. If we are consistent, dump
1112  * our snapshot so others or we, after a restart, can use it.
1113  */
1114  if (builder->state < SNAPBUILD_CONSISTENT)
1115  {
1116  /* returns false if there's no point in performing cleanup just yet */
1117  if (!SnapBuildFindSnapshot(builder, lsn, running))
1118  return;
1119  }
1120  else
1121  SnapBuildSerialize(builder, lsn);
1122 
1123  /*
1124  * Update range of interesting xids based on the running xacts
1125  * information. We don't increase ->xmax using it, because once we are in
1126  * a consistent state we can do that ourselves and much more efficiently
1127  * so, because we only need to do it for catalog transactions since we
1128  * only ever look at those.
1129  *
1130  * NB: We only increase xmax when a catalog modifying transaction commits
1131  * (see SnapBuildCommitTxn). Because of this, xmax can be lower than
1132  * xmin, which looks odd but is correct and actually more efficient, since
1133  * we hit fast paths in heapam_visibility.c.
1134  */
1135  builder->xmin = running->oldestRunningXid;
1136 
1137  /* Remove transactions we don't need to keep track off anymore */
1138  SnapBuildPurgeCommittedTxn(builder);
1139 
1140  /*
1141  * Advance the xmin limit for the current replication slot, to allow
1142  * vacuum to clean up the tuples this slot has been protecting.
1143  *
1144  * The reorderbuffer might have an xmin among the currently running
1145  * snapshots; use it if so. If not, we need only consider the snapshots
1146  * we'll produce later, which can't be less than the oldest running xid in
1147  * the record we're reading now.
1148  */
1149  xmin = ReorderBufferGetOldestXmin(builder->reorder);
1150  if (xmin == InvalidTransactionId)
1151  xmin = running->oldestRunningXid;
1152  elog(DEBUG3, "xmin: %u, xmax: %u, oldest running: %u, oldest xmin: %u",
1153  builder->xmin, builder->xmax, running->oldestRunningXid, xmin);
1154  LogicalIncreaseXminForSlot(lsn, xmin);
1155 
1156  /*
1157  * Also tell the slot where we can restart decoding from. We don't want to
1158  * do that after every commit because changing that implies an fsync of
1159  * the logical slot's state file, so we only do it every time we see a
1160  * running xacts record.
1161  *
1162  * Do so by looking for the oldest in progress transaction (determined by
1163  * the first LSN of any of its relevant records). Every transaction
1164  * remembers the last location we stored the snapshot to disk before its
1165  * beginning. That point is where we can restart from.
1166  */
1167 
1168  /*
1169  * Can't know about a serialized snapshot's location if we're not
1170  * consistent.
1171  */
1172  if (builder->state < SNAPBUILD_CONSISTENT)
1173  return;
1174 
1175  txn = ReorderBufferGetOldestTXN(builder->reorder);
1176 
1177  /*
1178  * oldest ongoing txn might have started when we didn't yet serialize
1179  * anything because we hadn't reached a consistent state yet.
1180  */
1181  if (txn != NULL && txn->restart_decoding_lsn != InvalidXLogRecPtr)
1183 
1184  /*
1185  * No in-progress transaction, can reuse the last serialized snapshot if
1186  * we have one.
1187  */
1188  else if (txn == NULL &&
1192  builder->last_serialized_snapshot);
1193 }
1194 
1195 
1196 /*
1197  * Build the start of a snapshot that's capable of decoding the catalog.
1198  *
1199  * Helper function for SnapBuildProcessRunningXacts() while we're not yet
1200  * consistent.
1201  *
1202  * Returns true if there is a point in performing internal maintenance/cleanup
1203  * using the xl_running_xacts record.
1204  */
1205 static bool
1207 {
1208  /* ---
1209  * Build catalog decoding snapshot incrementally using information about
1210  * the currently running transactions. There are several ways to do that:
1211  *
1212  * a) There were no running transactions when the xl_running_xacts record
1213  * was inserted, jump to CONSISTENT immediately. We might find such a
1214  * state while waiting on c)'s sub-states.
1215  *
1216  * b) This (in a previous run) or another decoding slot serialized a
1217  * snapshot to disk that we can use. Can't use this method for the
1218  * initial snapshot when slot is being created and needs full snapshot
1219  * for export or direct use, as that snapshot will only contain catalog
1220  * modifying transactions.
1221  *
1222  * c) First incrementally build a snapshot for catalog tuples
1223  * (BUILDING_SNAPSHOT), that requires all, already in-progress,
1224  * transactions to finish. Every transaction starting after that
1225  * (FULL_SNAPSHOT state), has enough information to be decoded. But
1226  * for older running transactions no viable snapshot exists yet, so
1227  * CONSISTENT will only be reached once all of those have finished.
1228  * ---
1229  */
1230 
1231  /*
1232  * xl_running_xact record is older than what we can use, we might not have
1233  * all necessary catalog rows anymore.
1234  */
1237  builder->initial_xmin_horizon))
1238  {
1239  ereport(DEBUG1,
1240  (errmsg_internal("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low",
1241  LSN_FORMAT_ARGS(lsn)),
1242  errdetail_internal("initial xmin horizon of %u vs the snapshot's %u",
1243  builder->initial_xmin_horizon, running->oldestRunningXid)));
1244 
1245 
1246  SnapBuildWaitSnapshot(running, builder->initial_xmin_horizon);
1247 
1248  return true;
1249  }
1250 
1251  /*
1252  * a) No transaction were running, we can jump to consistent.
1253  *
1254  * This is not affected by races around xl_running_xacts, because we can
1255  * miss transaction commits, but currently not transactions starting.
1256  *
1257  * NB: We might have already started to incrementally assemble a snapshot,
1258  * so we need to be careful to deal with that.
1259  */
1260  if (running->oldestRunningXid == running->nextXid)
1261  {
1262  if (builder->start_decoding_at == InvalidXLogRecPtr ||
1263  builder->start_decoding_at <= lsn)
1264  /* can decode everything after this */
1265  builder->start_decoding_at = lsn + 1;
1266 
1267  /* As no transactions were running xmin/xmax can be trivially set. */
1268  builder->xmin = running->nextXid; /* < are finished */
1269  builder->xmax = running->nextXid; /* >= are running */
1270 
1271  /* so we can safely use the faster comparisons */
1272  Assert(TransactionIdIsNormal(builder->xmin));
1273  Assert(TransactionIdIsNormal(builder->xmax));
1274 
1275  builder->state = SNAPBUILD_CONSISTENT;
1277 
1278  ereport(LOG,
1279  (errmsg("logical decoding found consistent point at %X/%X",
1280  LSN_FORMAT_ARGS(lsn)),
1281  errdetail("There are no running transactions.")));
1282 
1283  return false;
1284  }
1285  /* b) valid on disk state and not building full snapshot */
1286  else if (!builder->building_full_snapshot &&
1287  SnapBuildRestore(builder, lsn))
1288  {
1289  /* there won't be any state to cleanup */
1290  return false;
1291  }
1292 
1293  /*
1294  * c) transition from START to BUILDING_SNAPSHOT.
1295  *
1296  * In START state, and a xl_running_xacts record with running xacts is
1297  * encountered. In that case, switch to BUILDING_SNAPSHOT state, and
1298  * record xl_running_xacts->nextXid. Once all running xacts have finished
1299  * (i.e. they're all >= nextXid), we have a complete catalog snapshot. It
1300  * might look that we could use xl_running_xact's ->xids information to
1301  * get there quicker, but that is problematic because transactions marked
1302  * as running, might already have inserted their commit record - it's
1303  * infeasible to change that with locking.
1304  */
1305  else if (builder->state == SNAPBUILD_START)
1306  {
1308  builder->next_phase_at = running->nextXid;
1309 
1310  /*
1311  * Start with an xmin/xmax that's correct for future, when all the
1312  * currently running transactions have finished. We'll update both
1313  * while waiting for the pending transactions to finish.
1314  */
1315  builder->xmin = running->nextXid; /* < are finished */
1316  builder->xmax = running->nextXid; /* >= are running */
1317 
1318  /* so we can safely use the faster comparisons */
1319  Assert(TransactionIdIsNormal(builder->xmin));
1320  Assert(TransactionIdIsNormal(builder->xmax));
1321 
1322  ereport(LOG,
1323  (errmsg("logical decoding found initial starting point at %X/%X",
1324  LSN_FORMAT_ARGS(lsn)),
1325  errdetail("Waiting for transactions (approximately %d) older than %u to end.",
1326  running->xcnt, running->nextXid)));
1327 
1328  SnapBuildWaitSnapshot(running, running->nextXid);
1329  }
1330 
1331  /*
1332  * c) transition from BUILDING_SNAPSHOT to FULL_SNAPSHOT.
1333  *
1334  * In BUILDING_SNAPSHOT state, and this xl_running_xacts' oldestRunningXid
1335  * is >= than nextXid from when we switched to BUILDING_SNAPSHOT. This
1336  * means all transactions starting afterwards have enough information to
1337  * be decoded. Switch to FULL_SNAPSHOT.
1338  */
1339  else if (builder->state == SNAPBUILD_BUILDING_SNAPSHOT &&
1341  running->oldestRunningXid))
1342  {
1343  builder->state = SNAPBUILD_FULL_SNAPSHOT;
1344  builder->next_phase_at = running->nextXid;
1345 
1346  ereport(LOG,
1347  (errmsg("logical decoding found initial consistent point at %X/%X",
1348  LSN_FORMAT_ARGS(lsn)),
1349  errdetail("Waiting for transactions (approximately %d) older than %u to end.",
1350  running->xcnt, running->nextXid)));
1351 
1352  SnapBuildWaitSnapshot(running, running->nextXid);
1353  }
1354 
1355  /*
1356  * c) transition from FULL_SNAPSHOT to CONSISTENT.
1357  *
1358  * In FULL_SNAPSHOT state (see d) ), and this xl_running_xacts'
1359  * oldestRunningXid is >= than nextXid from when we switched to
1360  * FULL_SNAPSHOT. This means all transactions that are currently in
1361  * progress have a catalog snapshot, and all their changes have been
1362  * collected. Switch to CONSISTENT.
1363  */
1364  else if (builder->state == SNAPBUILD_FULL_SNAPSHOT &&
1366  running->oldestRunningXid))
1367  {
1368  builder->state = SNAPBUILD_CONSISTENT;
1370 
1371  ereport(LOG,
1372  (errmsg("logical decoding found consistent point at %X/%X",
1373  LSN_FORMAT_ARGS(lsn)),
1374  errdetail("There are no old transactions anymore.")));
1375  }
1376 
1377  /*
1378  * We already started to track running xacts and need to wait for all
1379  * in-progress ones to finish. We fall through to the normal processing of
1380  * records so incremental cleanup can be performed.
1381  */
1382  return true;
1383 }
1384 
1385 /* ---
1386  * Iterate through xids in record, wait for all older than the cutoff to
1387  * finish. Then, if possible, log a new xl_running_xacts record.
1388  *
1389  * This isn't required for the correctness of decoding, but to:
1390  * a) allow isolationtester to notice that we're currently waiting for
1391  * something.
1392  * b) log a new xl_running_xacts record where it'd be helpful, without having
1393  * to wait for bgwriter or checkpointer.
1394  * ---
1395  */
1396 static void
1398 {
1399  int off;
1400 
1401  for (off = 0; off < running->xcnt; off++)
1402  {
1403  TransactionId xid = running->xids[off];
1404 
1405  /*
1406  * Upper layers should prevent that we ever need to wait on ourselves.
1407  * Check anyway, since failing to do so would either result in an
1408  * endless wait or an Assert() failure.
1409  */
1411  elog(ERROR, "waiting for ourselves");
1412 
1413  if (TransactionIdFollows(xid, cutoff))
1414  continue;
1415 
1416  XactLockTableWait(xid, NULL, NULL, XLTW_None);
1417  }
1418 
1419  /*
1420  * All transactions we needed to finish finished - try to ensure there is
1421  * another xl_running_xacts record in a timely manner, without having to
1422  * wait for bgwriter or checkpointer to log one. During recovery we can't
1423  * enforce that, so we'll have to wait.
1424  */
1425  if (!RecoveryInProgress())
1426  {
1428  }
1429 }
1430 
1431 /* -----------------------------------
1432  * Snapshot serialization support
1433  * -----------------------------------
1434  */
1435 
1436 /*
1437  * We store current state of struct SnapBuild on disk in the following manner:
1438  *
1439  * struct SnapBuildOnDisk;
1440  * TransactionId * committed.xcnt; (*not xcnt_space*)
1441  *
1442  */
1443 typedef struct SnapBuildOnDisk
1444 {
1445  /* first part of this struct needs to be version independent */
1446 
1447  /* data not covered by checksum */
1450 
1451  /* data covered by checksum */
1452 
1453  /* version, in case we want to support pg_upgrade */
1455  /* how large is the on disk data, excluding the constant sized part */
1457 
1458  /* version dependent part */
1460 
1461  /* variable amount of TransactionIds follows */
1463 
1464 #define SnapBuildOnDiskConstantSize \
1465  offsetof(SnapBuildOnDisk, builder)
1466 #define SnapBuildOnDiskNotChecksummedSize \
1467  offsetof(SnapBuildOnDisk, version)
1468 
1469 #define SNAPBUILD_MAGIC 0x51A1E001
1470 #define SNAPBUILD_VERSION 4
1471 
1472 /*
1473  * Store/Load a snapshot from disk, depending on the snapshot builder's state.
1474  *
1475  * Supposed to be used by external (i.e. not snapbuild.c) code that just read
1476  * a record that's a potential location for a serialized snapshot.
1477  */
1478 void
1480 {
1481  if (builder->state < SNAPBUILD_CONSISTENT)
1482  SnapBuildRestore(builder, lsn);
1483  else
1484  SnapBuildSerialize(builder, lsn);
1485 }
1486 
1487 /*
1488  * Serialize the snapshot 'builder' at the location 'lsn' if it hasn't already
1489  * been done by another decoding process.
1490  */
1491 static void
1493 {
1494  Size needed_length;
1495  SnapBuildOnDisk *ondisk = NULL;
1496  char *ondisk_c;
1497  int fd;
1498  char tmppath[MAXPGPATH];
1499  char path[MAXPGPATH];
1500  int ret;
1501  struct stat stat_buf;
1502  Size sz;
1503 
1504  Assert(lsn != InvalidXLogRecPtr);
1506  builder->last_serialized_snapshot <= lsn);
1507 
1508  /*
1509  * no point in serializing if we cannot continue to work immediately after
1510  * restoring the snapshot
1511  */
1512  if (builder->state < SNAPBUILD_CONSISTENT)
1513  return;
1514 
1515  /* consistent snapshots have no next phase */
1517 
1518  /*
1519  * We identify snapshots by the LSN they are valid for. We don't need to
1520  * include timelines in the name as each LSN maps to exactly one timeline
1521  * unless the user used pg_resetwal or similar. If a user did so, there's
1522  * no hope continuing to decode anyway.
1523  */
1524  sprintf(path, "pg_logical/snapshots/%X-%X.snap",
1525  LSN_FORMAT_ARGS(lsn));
1526 
1527  /*
1528  * first check whether some other backend already has written the snapshot
1529  * for this LSN. It's perfectly fine if there's none, so we accept ENOENT
1530  * as a valid state. Everything else is an unexpected error.
1531  */
1532  ret = stat(path, &stat_buf);
1533 
1534  if (ret != 0 && errno != ENOENT)
1535  ereport(ERROR,
1537  errmsg("could not stat file \"%s\": %m", path)));
1538 
1539  else if (ret == 0)
1540  {
1541  /*
1542  * somebody else has already serialized to this point, don't overwrite
1543  * but remember location, so we don't need to read old data again.
1544  *
1545  * To be sure it has been synced to disk after the rename() from the
1546  * tempfile filename to the real filename, we just repeat the fsync.
1547  * That ought to be cheap because in most scenarios it should already
1548  * be safely on disk.
1549  */
1550  fsync_fname(path, false);
1551  fsync_fname("pg_logical/snapshots", true);
1552 
1553  builder->last_serialized_snapshot = lsn;
1554  goto out;
1555  }
1556 
1557  /*
1558  * there is an obvious race condition here between the time we stat(2) the
1559  * file and us writing the file. But we rename the file into place
1560  * atomically and all files created need to contain the same data anyway,
1561  * so this is perfectly fine, although a bit of a resource waste. Locking
1562  * seems like pointless complication.
1563  */
1564  elog(DEBUG1, "serializing snapshot to %s", path);
1565 
1566  /* to make sure only we will write to this tempfile, include pid */
1567  sprintf(tmppath, "pg_logical/snapshots/%X-%X.snap.%d.tmp",
1568  LSN_FORMAT_ARGS(lsn), MyProcPid);
1569 
1570  /*
1571  * Unlink temporary file if it already exists, needs to have been before a
1572  * crash/error since we won't enter this function twice from within a
1573  * single decoding slot/backend and the temporary file contains the pid of
1574  * the current process.
1575  */
1576  if (unlink(tmppath) != 0 && errno != ENOENT)
1577  ereport(ERROR,
1579  errmsg("could not remove file \"%s\": %m", tmppath)));
1580 
1581  needed_length = sizeof(SnapBuildOnDisk) +
1582  sizeof(TransactionId) * builder->committed.xcnt;
1583 
1584  ondisk_c = MemoryContextAllocZero(builder->context, needed_length);
1585  ondisk = (SnapBuildOnDisk *) ondisk_c;
1586  ondisk->magic = SNAPBUILD_MAGIC;
1587  ondisk->version = SNAPBUILD_VERSION;
1588  ondisk->length = needed_length;
1589  INIT_CRC32C(ondisk->checksum);
1590  COMP_CRC32C(ondisk->checksum,
1591  ((char *) ondisk) + SnapBuildOnDiskNotChecksummedSize,
1593  ondisk_c += sizeof(SnapBuildOnDisk);
1594 
1595  memcpy(&ondisk->builder, builder, sizeof(SnapBuild));
1596  /* NULL-ify memory-only data */
1597  ondisk->builder.context = NULL;
1598  ondisk->builder.snapshot = NULL;
1599  ondisk->builder.reorder = NULL;
1600  ondisk->builder.committed.xip = NULL;
1601 
1602  COMP_CRC32C(ondisk->checksum,
1603  &ondisk->builder,
1604  sizeof(SnapBuild));
1605 
1606  /* copy committed xacts */
1607  sz = sizeof(TransactionId) * builder->committed.xcnt;
1608  memcpy(ondisk_c, builder->committed.xip, sz);
1609  COMP_CRC32C(ondisk->checksum, ondisk_c, sz);
1610  ondisk_c += sz;
1611 
1612  FIN_CRC32C(ondisk->checksum);
1613 
1614  /* we have valid data now, open tempfile and write it there */
1615  fd = OpenTransientFile(tmppath,
1616  O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
1617  if (fd < 0)
1618  ereport(ERROR,
1620  errmsg("could not open file \"%s\": %m", tmppath)));
1621 
1622  errno = 0;
1624  if ((write(fd, ondisk, needed_length)) != needed_length)
1625  {
1626  int save_errno = errno;
1627 
1629 
1630  /* if write didn't set errno, assume problem is no disk space */
1631  errno = save_errno ? save_errno : ENOSPC;
1632  ereport(ERROR,
1634  errmsg("could not write to file \"%s\": %m", tmppath)));
1635  }
1637 
1638  /*
1639  * fsync the file before renaming so that even if we crash after this we
1640  * have either a fully valid file or nothing.
1641  *
1642  * It's safe to just ERROR on fsync() here because we'll retry the whole
1643  * operation including the writes.
1644  *
1645  * TODO: Do the fsync() via checkpoints/restartpoints, doing it here has
1646  * some noticeable overhead since it's performed synchronously during
1647  * decoding?
1648  */
1650  if (pg_fsync(fd) != 0)
1651  {
1652  int save_errno = errno;
1653 
1655  errno = save_errno;
1656  ereport(ERROR,
1658  errmsg("could not fsync file \"%s\": %m", tmppath)));
1659  }
1661 
1662  if (CloseTransientFile(fd) != 0)
1663  ereport(ERROR,
1665  errmsg("could not close file \"%s\": %m", tmppath)));
1666 
1667  fsync_fname("pg_logical/snapshots", true);
1668 
1669  /*
1670  * We may overwrite the work from some other backend, but that's ok, our
1671  * snapshot is valid as well, we'll just have done some superfluous work.
1672  */
1673  if (rename(tmppath, path) != 0)
1674  {
1675  ereport(ERROR,
1677  errmsg("could not rename file \"%s\" to \"%s\": %m",
1678  tmppath, path)));
1679  }
1680 
1681  /* make sure we persist */
1682  fsync_fname(path, false);
1683  fsync_fname("pg_logical/snapshots", true);
1684 
1685  /*
1686  * Now there's no way we can loose the dumped state anymore, remember this
1687  * as a serialization point.
1688  */
1689  builder->last_serialized_snapshot = lsn;
1690 
1691 out:
1693  builder->last_serialized_snapshot);
1694  /* be tidy */
1695  if (ondisk)
1696  pfree(ondisk);
1697 }
1698 
1699 /*
1700  * Restore a snapshot into 'builder' if previously one has been stored at the
1701  * location indicated by 'lsn'. Returns true if successful, false otherwise.
1702  */
1703 static bool
1705 {
1706  SnapBuildOnDisk ondisk;
1707  int fd;
1708  char path[MAXPGPATH];
1709  Size sz;
1710  int readBytes;
1711  pg_crc32c checksum;
1712 
1713  /* no point in loading a snapshot if we're already there */
1714  if (builder->state == SNAPBUILD_CONSISTENT)
1715  return false;
1716 
1717  sprintf(path, "pg_logical/snapshots/%X-%X.snap",
1718  LSN_FORMAT_ARGS(lsn));
1719 
1720  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
1721 
1722  if (fd < 0 && errno == ENOENT)
1723  return false;
1724  else if (fd < 0)
1725  ereport(ERROR,
1727  errmsg("could not open file \"%s\": %m", path)));
1728 
1729  /* ----
1730  * Make sure the snapshot had been stored safely to disk, that's normally
1731  * cheap.
1732  * Note that we do not need PANIC here, nobody will be able to use the
1733  * slot without fsyncing, and saving it won't succeed without an fsync()
1734  * either...
1735  * ----
1736  */
1737  fsync_fname(path, false);
1738  fsync_fname("pg_logical/snapshots", true);
1739 
1740 
1741  /* read statically sized portion of snapshot */
1743  readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
1745  if (readBytes != SnapBuildOnDiskConstantSize)
1746  {
1747  int save_errno = errno;
1748 
1750 
1751  if (readBytes < 0)
1752  {
1753  errno = save_errno;
1754  ereport(ERROR,
1756  errmsg("could not read file \"%s\": %m", path)));
1757  }
1758  else
1759  ereport(ERROR,
1761  errmsg("could not read file \"%s\": read %d of %zu",
1762  path, readBytes,
1764  }
1765 
1766  if (ondisk.magic != SNAPBUILD_MAGIC)
1767  ereport(ERROR,
1769  errmsg("snapbuild state file \"%s\" has wrong magic number: %u instead of %u",
1770  path, ondisk.magic, SNAPBUILD_MAGIC)));
1771 
1772  if (ondisk.version != SNAPBUILD_VERSION)
1773  ereport(ERROR,
1775  errmsg("snapbuild state file \"%s\" has unsupported version: %u instead of %u",
1776  path, ondisk.version, SNAPBUILD_VERSION)));
1777 
1778  INIT_CRC32C(checksum);
1779  COMP_CRC32C(checksum,
1780  ((char *) &ondisk) + SnapBuildOnDiskNotChecksummedSize,
1782 
1783  /* read SnapBuild */
1785  readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
1787  if (readBytes != sizeof(SnapBuild))
1788  {
1789  int save_errno = errno;
1790 
1792 
1793  if (readBytes < 0)
1794  {
1795  errno = save_errno;
1796  ereport(ERROR,
1798  errmsg("could not read file \"%s\": %m", path)));
1799  }
1800  else
1801  ereport(ERROR,
1803  errmsg("could not read file \"%s\": read %d of %zu",
1804  path, readBytes, sizeof(SnapBuild))));
1805  }
1806  COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
1807 
1808  /* restore committed xacts information */
1809  sz = sizeof(TransactionId) * ondisk.builder.committed.xcnt;
1810  ondisk.builder.committed.xip = MemoryContextAllocZero(builder->context, sz);
1812  readBytes = read(fd, ondisk.builder.committed.xip, sz);
1814  if (readBytes != sz)
1815  {
1816  int save_errno = errno;
1817 
1819 
1820  if (readBytes < 0)
1821  {
1822  errno = save_errno;
1823  ereport(ERROR,
1825  errmsg("could not read file \"%s\": %m", path)));
1826  }
1827  else
1828  ereport(ERROR,
1830  errmsg("could not read file \"%s\": read %d of %zu",
1831  path, readBytes, sz)));
1832  }
1833  COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
1834 
1835  if (CloseTransientFile(fd) != 0)
1836  ereport(ERROR,
1838  errmsg("could not close file \"%s\": %m", path)));
1839 
1840  FIN_CRC32C(checksum);
1841 
1842  /* verify checksum of what we've read */
1843  if (!EQ_CRC32C(checksum, ondisk.checksum))
1844  ereport(ERROR,
1846  errmsg("checksum mismatch for snapbuild state file \"%s\": is %u, should be %u",
1847  path, checksum, ondisk.checksum)));
1848 
1849  /*
1850  * ok, we now have a sensible snapshot here, figure out if it has more
1851  * information than we have.
1852  */
1853 
1854  /*
1855  * We are only interested in consistent snapshots for now, comparing
1856  * whether one incomplete snapshot is more "advanced" seems to be
1857  * unnecessarily complex.
1858  */
1859  if (ondisk.builder.state < SNAPBUILD_CONSISTENT)
1860  goto snapshot_not_interesting;
1861 
1862  /*
1863  * Don't use a snapshot that requires an xmin that we cannot guarantee to
1864  * be available.
1865  */
1867  goto snapshot_not_interesting;
1868 
1869  /* consistent snapshots have no next phase */
1871 
1872  /* ok, we think the snapshot is sensible, copy over everything important */
1873  builder->xmin = ondisk.builder.xmin;
1874  builder->xmax = ondisk.builder.xmax;
1875  builder->state = ondisk.builder.state;
1876 
1877  builder->committed.xcnt = ondisk.builder.committed.xcnt;
1878  /* We only allocated/stored xcnt, not xcnt_space xids ! */
1879  /* don't overwrite preallocated xip, if we don't have anything here */
1880  if (builder->committed.xcnt > 0)
1881  {
1882  pfree(builder->committed.xip);
1883  builder->committed.xcnt_space = ondisk.builder.committed.xcnt;
1884  builder->committed.xip = ondisk.builder.committed.xip;
1885  }
1886  ondisk.builder.committed.xip = NULL;
1887 
1888  /* our snapshot is not interesting anymore, build a new one */
1889  if (builder->snapshot != NULL)
1890  {
1892  }
1893  builder->snapshot = SnapBuildBuildSnapshot(builder);
1895 
1896  ReorderBufferSetRestartPoint(builder->reorder, lsn);
1897 
1898  Assert(builder->state == SNAPBUILD_CONSISTENT);
1899 
1900  ereport(LOG,
1901  (errmsg("logical decoding found consistent point at %X/%X",
1902  LSN_FORMAT_ARGS(lsn)),
1903  errdetail("Logical decoding will begin using saved snapshot.")));
1904  return true;
1905 
1906 snapshot_not_interesting:
1907  if (ondisk.builder.committed.xip != NULL)
1908  pfree(ondisk.builder.committed.xip);
1909  return false;
1910 }
1911 
1912 /*
1913  * Remove all serialized snapshots that are not required anymore because no
1914  * slot can need them. This doesn't actually have to run during a checkpoint,
1915  * but it's a convenient point to schedule this.
1916  *
1917  * NB: We run this during checkpoints even if logical decoding is disabled so
1918  * we cleanup old slots at some point after it got disabled.
1919  */
1920 void
1922 {
1923  XLogRecPtr cutoff;
1924  XLogRecPtr redo;
1925  DIR *snap_dir;
1926  struct dirent *snap_de;
1927  char path[MAXPGPATH + 21];
1928 
1929  /*
1930  * We start off with a minimum of the last redo pointer. No new
1931  * replication slot will start before that, so that's a safe upper bound
1932  * for removal.
1933  */
1934  redo = GetRedoRecPtr();
1935 
1936  /* now check for the restart ptrs from existing slots */
1938 
1939  /* don't start earlier than the restart lsn */
1940  if (redo < cutoff)
1941  cutoff = redo;
1942 
1943  snap_dir = AllocateDir("pg_logical/snapshots");
1944  while ((snap_de = ReadDir(snap_dir, "pg_logical/snapshots")) != NULL)
1945  {
1946  uint32 hi;
1947  uint32 lo;
1948  XLogRecPtr lsn;
1949  struct stat statbuf;
1950 
1951  if (strcmp(snap_de->d_name, ".") == 0 ||
1952  strcmp(snap_de->d_name, "..") == 0)
1953  continue;
1954 
1955  snprintf(path, sizeof(path), "pg_logical/snapshots/%s", snap_de->d_name);
1956 
1957  if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
1958  {
1959  elog(DEBUG1, "only regular files expected: %s", path);
1960  continue;
1961  }
1962 
1963  /*
1964  * temporary filenames from SnapBuildSerialize() include the LSN and
1965  * everything but are postfixed by .$pid.tmp. We can just remove them
1966  * the same as other files because there can be none that are
1967  * currently being written that are older than cutoff.
1968  *
1969  * We just log a message if a file doesn't fit the pattern, it's
1970  * probably some editors lock/state file or similar...
1971  */
1972  if (sscanf(snap_de->d_name, "%X-%X.snap", &hi, &lo) != 2)
1973  {
1974  ereport(LOG,
1975  (errmsg("could not parse file name \"%s\"", path)));
1976  continue;
1977  }
1978 
1979  lsn = ((uint64) hi) << 32 | lo;
1980 
1981  /* check whether we still need it */
1982  if (lsn < cutoff || cutoff == InvalidXLogRecPtr)
1983  {
1984  elog(DEBUG1, "removing snapbuild snapshot %s", path);
1985 
1986  /*
1987  * It's not particularly harmful, though strange, if we can't
1988  * remove the file here. Don't prevent the checkpoint from
1989  * completing, that'd be a cure worse than the disease.
1990  */
1991  if (unlink(path) < 0)
1992  {
1993  ereport(LOG,
1995  errmsg("could not remove file \"%s\": %m",
1996  path)));
1997  continue;
1998  }
1999  }
2000  }
2001  FreeDir(snap_dir);
2002 }
#define InvalidCommandId
Definition: c.h:604
unsigned int uint32
Definition: c.h:441
#define Max(x, y)
Definition: c.h:980
#define PG_BINARY
Definition: c.h:1268
#define FirstCommandId
Definition: c.h:603
uint32 CommandId
Definition: c.h:601
uint32 TransactionId
Definition: c.h:587
size_t Size
Definition: c.h:540
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1014
int errmsg_internal(const char *fmt,...)
Definition: elog.c:991
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1064
int errcode_for_file_access(void)
Definition: elog.c:716
int errdetail(const char *fmt,...)
Definition: elog.c:1037
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define LOG
Definition: elog.h:25
#define DEBUG3
Definition: elog.h:22
#define DEBUG2
Definition: elog.h:23
#define DEBUG1
Definition: elog.h:24
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define ereport(elevel,...)
Definition: elog.h:143
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2788
int FreeDir(DIR *dir)
Definition: fd.c:2840
int CloseTransientFile(int fd)
Definition: fd.c:2688
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:673
int pg_fsync(int fd)
Definition: fd.c:359
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2511
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2722
int MyProcPid
Definition: globals.c:44
#define dlist_foreach(iter, lhead)
Definition: ilist.h:526
#define dlist_container(type, membername, ptr)
Definition: ilist.h:496
#define write(a, b, c)
Definition: win32.h:14
#define read(a, b, c)
Definition: win32.h:13
Assert(fmt[strlen(fmt) - 1] !='\n')
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:668
@ XLTW_None
Definition: lmgr.h:26
void LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
Definition: logical.c:1662
void LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
Definition: logical.c:1594
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1196
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1800
@ LW_SHARED
Definition: lwlock.h:105
void pfree(void *pointer)
Definition: mcxt.c:1175
void * palloc0(Size size)
Definition: mcxt.c:1099
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:906
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1188
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
void * palloc(Size size)
Definition: mcxt.c:1068
#define AllocSetContextCreate
Definition: memutils.h:173
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:197
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:43
#define MAXPGPATH
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
#define ERRCODE_T_R_SERIALIZATION_FAILURE
Definition: pgbench.c:79
#define sprintf
Definition: port.h:227
#define snprintf
Definition: port.h:225
#define qsort(a, b, c, d)
Definition: port.h:495
static void test(void)
static int fd(const char *x, int i)
Definition: preproc-init.c:105
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition: procarray.c:2962
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:2070
void ReorderBufferXidSetCatalogChanges(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
void ReorderBufferAddNewCommandId(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, CommandId cid)
void ReorderBufferSetBaseSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *rb, TransactionId xid)
TransactionId ReorderBufferGetOldestXmin(ReorderBuffer *rb)
ReorderBufferTXN * ReorderBufferGetOldestTXN(ReorderBuffer *rb)
bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *rb, TransactionId xid)
void ReorderBufferAddSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
void ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, RelFileNode node, ItemPointerData tid, CommandId cmin, CommandId cmax, CommandId combocid)
void ReorderBufferSetRestartPoint(ReorderBuffer *rb, XLogRecPtr ptr)
#define rbtxn_prepared(txn)
#define rbtxn_skip_prepared(txn)
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void)
Definition: slot.c:930
Snapshot SnapBuildGetOrBuildSnapshot(SnapBuild *builder, TransactionId xid)
Definition: snapbuild.c:662
static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:1492
void SnapBuildSnapDecRefcount(Snapshot snap)
Definition: snapbuild.c:418
#define SNAPBUILD_VERSION
Definition: snapbuild.c:1470
bool SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr)
Definition: snapbuild.c:394
void SnapBuildResetExportedSnapshotState(void)
Definition: snapbuild.c:710
void SnapBuildSetTwoPhaseAt(SnapBuild *builder, XLogRecPtr ptr)
Definition: snapbuild.c:385
static void SnapBuildSnapIncRefcount(Snapshot snap)
Definition: snapbuild.c:406
bool SnapBuildProcessChange(SnapBuild *builder, TransactionId xid, XLogRecPtr lsn)
Definition: snapbuild.c:722
const char * SnapBuildExportSnapshot(SnapBuild *builder)
Definition: snapbuild.c:622
XLogRecPtr SnapBuildGetTwoPhaseAt(SnapBuild *builder)
Definition: snapbuild.c:376
SnapBuildState SnapBuildCurrentState(SnapBuild *builder)
Definition: snapbuild.c:367
#define SnapBuildOnDiskNotChecksummedSize
Definition: snapbuild.c:1466
void FreeSnapshotBuilder(SnapBuild *builder)
Definition: snapbuild.c:323
void CheckPointSnapBuild(void)
Definition: snapbuild.c:1921
static void SnapBuildAddCommittedTxn(SnapBuild *builder, TransactionId xid)
Definition: snapbuild.c:867
SnapBuild * AllocateSnapshotBuilder(ReorderBuffer *reorder, TransactionId xmin_horizon, XLogRecPtr start_lsn, bool need_full_snapshot, XLogRecPtr two_phase_at)
Definition: snapbuild.c:280
#define SNAPBUILD_MAGIC
Definition: snapbuild.c:1469
Snapshot SnapBuildInitialSnapshot(SnapBuild *builder)
Definition: snapbuild.c:530
static ResourceOwner SavedResourceOwnerDuringExport
Definition: snapbuild.c:250
void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:1479
static void SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff)
Definition: snapbuild.c:1397
static Snapshot SnapBuildBuildSnapshot(SnapBuild *builder)
Definition: snapbuild.c:450
void SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid, XLogRecPtr lsn, xl_heap_new_cid *xlrec)
Definition: snapbuild.c:772
void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running)
Definition: snapbuild.c:1104
static void SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:814
void SnapBuildClearExportedSnapshot(void)
Definition: snapbuild.c:683
static void SnapBuildFreeSnapshot(Snapshot snap)
Definition: snapbuild.c:342
static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running)
Definition: snapbuild.c:1206
void SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, int nsubxacts, TransactionId *subxacts)
Definition: snapbuild.c:937
static bool ExportInProgress
Definition: snapbuild.c:251
struct SnapBuildOnDisk SnapBuildOnDisk
#define SnapBuildOnDiskConstantSize
Definition: snapbuild.c:1464
static bool SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:1704
static void SnapBuildPurgeCommittedTxn(SnapBuild *builder)
Definition: snapbuild.c:896
SnapBuildState
Definition: snapbuild.h:19
@ SNAPBUILD_START
Definition: snapbuild.h:23
@ SNAPBUILD_BUILDING_SNAPSHOT
Definition: snapbuild.h:29
@ SNAPBUILD_FULL_SNAPSHOT
Definition: snapbuild.h:39
@ SNAPBUILD_CONSISTENT
Definition: snapbuild.h:46
bool FirstSnapshotSet
Definition: snapmgr.c:149
char * ExportSnapshot(Snapshot snapshot)
Definition: snapmgr.c:1123
struct SnapshotData SnapshotData
@ SNAPSHOT_MVCC
Definition: snapshot.h:50
@ SNAPSHOT_HISTORIC_MVCC
Definition: snapshot.h:109
PGPROC * MyProc
Definition: proc.c:68
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1222
Definition: dirent.c:26
TransactionId xmin
Definition: proc.h:176
XLogRecPtr restart_decoding_lsn
TransactionId xid
dlist_head toplevel_by_lsn
XLogRecPtr current_restart_decoding_lsn
SnapBuild builder
Definition: snapbuild.c:1459
pg_crc32c checksum
Definition: snapbuild.c:1449
XLogRecPtr start_decoding_at
Definition: snapbuild.c:165
SnapBuildState state
Definition: snapbuild.c:150
TransactionId xmin
Definition: snapbuild.c:156
TransactionId initial_xmin_horizon
Definition: snapbuild.c:182
struct SnapBuild::@16 committed
TransactionId xmax
Definition: snapbuild.c:159
TransactionId * xip
Definition: snapbuild.c:242
Snapshot snapshot
Definition: snapbuild.c:190
XLogRecPtr two_phase_at
Definition: snapbuild.c:176
bool building_full_snapshot
Definition: snapbuild.c:185
TransactionId next_phase_at
Definition: snapbuild.c:207
size_t xcnt
Definition: snapbuild.c:216
XLogRecPtr last_serialized_snapshot
Definition: snapbuild.c:195
size_t xcnt_space
Definition: snapbuild.c:219
bool includes_all_transactions
Definition: snapbuild.c:226
MemoryContext context
Definition: snapbuild.c:153
ReorderBuffer * reorder
Definition: snapbuild.c:200
TransactionId xmin
Definition: snapshot.h:157
int32 subxcnt
Definition: snapshot.h:181
bool copied
Definition: snapshot.h:185
uint32 regd_count
Definition: snapshot.h:205
uint32 active_count
Definition: snapshot.h:204
CommandId curcid
Definition: snapshot.h:187
uint32 xcnt
Definition: snapshot.h:169
TransactionId * subxip
Definition: snapshot.h:180
uint64 snapXactCompletionCount
Definition: snapshot.h:216
TransactionId xmax
Definition: snapshot.h:158
SnapshotType snapshot_type
Definition: snapshot.h:144
TransactionId * xip
Definition: snapshot.h:168
bool suboverflowed
Definition: snapshot.h:182
bool takenDuringRecovery
Definition: snapshot.h:184
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
dlist_node * cur
Definition: ilist.h:161
unsigned short st_mode
Definition: win32_port.h:268
CommandId cmin
Definition: heapam_xlog.h:368
CommandId combocid
Definition: heapam_xlog.h:370
RelFileNode target_node
Definition: heapam_xlog.h:375
ItemPointerData target_tid
Definition: heapam_xlog.h:376
TransactionId top_xid
Definition: heapam_xlog.h:367
CommandId cmax
Definition: heapam_xlog.h:369
TransactionId oldestRunningXid
Definition: standbydefs.h:53
TransactionId xids[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:56
TransactionId nextXid
Definition: standbydefs.h:52
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:273
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:292
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:307
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:322
#define InvalidTransactionId
Definition: transam.h:31
#define NormalTransactionIdPrecedes(id1, id2)
Definition: transam.h:147
#define NormalTransactionIdFollows(id1, id2)
Definition: transam.h:152
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
#define TransactionIdAdvance(dest)
Definition: transam.h:91
@ WAIT_EVENT_SNAPBUILD_SYNC
Definition: wait_event.h:210
@ WAIT_EVENT_SNAPBUILD_WRITE
Definition: wait_event.h:211
@ WAIT_EVENT_SNAPBUILD_READ
Definition: wait_event.h:209
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:266
static void pgstat_report_wait_end(void)
Definition: wait_event.h:282
#define stat
Definition: win32_port.h:283
#define lstat(path, sb)
Definition: win32_port.h:284
#define S_ISREG(m)
Definition: win32_port.h:327
bool IsTransactionOrTransactionBlock(void)
Definition: xact.c:4784
bool XactReadOnly
Definition: xact.c:81
bool IsTransactionState(void)
Definition: xact.c:374
void StartTransactionCommand(void)
Definition: xact.c:2925
int XactIsoLevel
Definition: xact.c:78
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:922
void AbortCurrentTransaction(void)
Definition: xact.c:3293
#define XACT_REPEATABLE_READ
Definition: xact.h:38
int xidComparator(const void *arg1, const void *arg2)
Definition: xid.c:136
bool RecoveryInProgress(void)
Definition: xlog.c:5762
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:5865
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28