PostgreSQL Source Code  git master
snapbuild.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * snapbuild.c
4  *
5  * Infrastructure for building historic catalog snapshots based on contents
6  * of the WAL, for the purpose of decoding heapam.c style values in the
7  * WAL.
8  *
9  * NOTES:
10  *
11  * We build snapshots which can *only* be used to read catalog contents and we
12  * do so by reading and interpreting the WAL stream. The aim is to build a
13  * snapshot that behaves the same as a freshly taken MVCC snapshot would have
14  * at the time the XLogRecord was generated.
15  *
16  * To build the snapshots we reuse the infrastructure built for Hot
17  * Standby. The in-memory snapshots we build look different than HS' because
18  * we have different needs. To successfully decode data from the WAL we only
19  * need to access catalog tables and (sys|rel|cat)cache, not the actual user
20  * tables since the data we decode is wholly contained in the WAL
21  * records. Also, our snapshots need to be different in comparison to normal
22  * MVCC ones because in contrast to those we cannot fully rely on the clog and
23  * pg_subtrans for information about committed transactions because they might
24  * commit in the future from the POV of the WAL entry we're currently
25  * decoding. This definition has the advantage that we only need to prevent
26  * removal of catalog rows, while normal table's rows can still be
27  * removed. This is achieved by using the replication slot mechanism.
28  *
29  * As the percentage of transactions modifying the catalog normally is fairly
30  * small in comparisons to ones only manipulating user data, we keep track of
31  * the committed catalog modifying ones inside [xmin, xmax) instead of keeping
32  * track of all running transactions like it's done in a normal snapshot. Note
33  * that we're generally only looking at transactions that have acquired an
34  * xid. That is we keep a list of transactions between snapshot->(xmin, xmax)
35  * that we consider committed, everything else is considered aborted/in
36  * progress. That also allows us not to care about subtransactions before they
37  * have committed which means this module, in contrast to HS, doesn't have to
38  * care about suboverflowed subtransactions and similar.
39  *
40  * One complexity of doing this is that to e.g. handle mixed DDL/DML
41  * transactions we need Snapshots that see intermediate versions of the
42  * catalog in a transaction. During normal operation this is achieved by using
43  * CommandIds/cmin/cmax. The problem with that however is that for space
44  * efficiency reasons only one value of that is stored
45  * (cf. combocid.c). Since ComboCids are only available in memory we log
46  * additional information which allows us to get the original (cmin, cmax)
47  * pair during visibility checks. Check the reorderbuffer.c's comment above
48  * ResolveCminCmaxDuringDecoding() for details.
49  *
50  * To facilitate all this we need our own visibility routine, as the normal
51  * ones are optimized for different usecases.
52  *
53  * To replace the normal catalog snapshots with decoding ones use the
54  * SetupHistoricSnapshot() and TeardownHistoricSnapshot() functions.
55  *
56  *
57  *
58  * The snapbuild machinery is starting up in several stages, as illustrated
59  * by the following graph describing the SnapBuild->state transitions:
60  *
61  * +-------------------------+
62  * +----| START |-------------+
63  * | +-------------------------+ |
64  * | | |
65  * | | |
66  * | running_xacts #1 |
67  * | | |
68  * | | |
69  * | v |
70  * | +-------------------------+ v
71  * | | BUILDING_SNAPSHOT |------------>|
72  * | +-------------------------+ |
73  * | | |
74  * | | |
75  * | running_xacts #2, xacts from #1 finished |
76  * | | |
77  * | | |
78  * | v |
79  * | +-------------------------+ v
80  * | | FULL_SNAPSHOT |------------>|
81  * | +-------------------------+ |
82  * | | |
83  * running_xacts | saved snapshot
84  * with zero xacts | at running_xacts's lsn
85  * | | |
86  * | running_xacts with xacts from #2 finished |
87  * | | |
88  * | v |
89  * | +-------------------------+ |
90  * +--->|SNAPBUILD_CONSISTENT |<------------+
91  * +-------------------------+
92  *
93  * Initially the machinery is in the START stage. When an xl_running_xacts
94  * record is read that is sufficiently new (above the safe xmin horizon),
95  * there's a state transition. If there were no running xacts when the
96  * running_xacts record was generated, we'll directly go into CONSISTENT
97  * state, otherwise we'll switch to the BUILDING_SNAPSHOT state. Having a full
98  * snapshot means that all transactions that start henceforth can be decoded
99  * in their entirety, but transactions that started previously can't. In
100  * FULL_SNAPSHOT we'll switch into CONSISTENT once all those previously
101  * running transactions have committed or aborted.
102  *
103  * Only transactions that commit after CONSISTENT state has been reached will
104  * be replayed, even though they might have started while still in
105  * FULL_SNAPSHOT. That ensures that we'll reach a point where no previous
106  * changes has been exported, but all the following ones will be. That point
107  * is a convenient point to initialize replication from, which is why we
108  * export a snapshot at that point, which *can* be used to read normal data.
109  *
110  * Copyright (c) 2012-2019, PostgreSQL Global Development Group
111  *
112  * IDENTIFICATION
113  * src/backend/replication/snapbuild.c
114  *
115  *-------------------------------------------------------------------------
116  */
117 
118 #include "postgres.h"
119 
120 #include <sys/stat.h>
121 #include <unistd.h>
122 
123 #include "access/heapam_xlog.h"
124 #include "access/transam.h"
125 #include "access/xact.h"
126 #include "miscadmin.h"
127 #include "pgstat.h"
128 #include "replication/logical.h"
130 #include "replication/snapbuild.h"
131 #include "storage/block.h" /* debugging output */
132 #include "storage/fd.h"
133 #include "storage/lmgr.h"
134 #include "storage/proc.h"
135 #include "storage/procarray.h"
136 #include "storage/standby.h"
137 #include "utils/builtins.h"
138 #include "utils/memutils.h"
139 #include "utils/snapmgr.h"
140 #include "utils/snapshot.h"
141 
142 /*
143  * This struct contains the current state of the snapshot building
144  * machinery. Besides a forward declaration in the header, it is not exposed
145  * to the public, so we can easily change its contents.
146  */
147 struct SnapBuild
148 {
149  /* how far are we along building our first full snapshot */
151 
152  /* private memory context used to allocate memory for this module. */
154 
155  /* all transactions < than this have committed/aborted */
157 
158  /* all transactions >= than this are uncommitted */
160 
161  /*
162  * Don't replay commits from an LSN < this LSN. This can be set externally
163  * but it will also be advanced (never retreat) from within snapbuild.c.
164  */
166 
167  /*
168  * Don't start decoding WAL until the "xl_running_xacts" information
169  * indicates there are no running xids with an xid smaller than this.
170  */
172 
173  /* Indicates if we are building full snapshot or just catalog one. */
175 
176  /*
177  * Snapshot that's valid to see the catalog state seen at this moment.
178  */
180 
181  /*
182  * LSN of the last location we are sure a snapshot has been serialized to.
183  */
185 
186  /*
187  * The reorderbuffer we need to update with usable snapshots et al.
188  */
190 
191  /*
192  * Outdated: This struct isn't used for its original purpose anymore, but
193  * can't be removed / changed in a minor version, because it's stored
194  * on-disk.
195  */
196  struct
197  {
198  /*
199  * NB: This field is misused, until a major version can break on-disk
200  * compatibility. See SnapBuildNextPhaseAt() /
201  * SnapBuildStartNextPhaseAt().
202  */
205 
206  size_t was_xcnt; /* number of used xip entries */
207  size_t was_xcnt_space; /* allocated size of xip */
208  TransactionId *was_xip; /* running xacts array, xidComparator-sorted */
209  } was_running;
210 
211  /*
212  * Array of transactions which could have catalog changes that committed
213  * between xmin and xmax.
214  */
215  struct
216  {
217  /* number of committed transactions */
218  size_t xcnt;
219 
220  /* available space for committed transactions */
221  size_t xcnt_space;
222 
223  /*
224  * Until we reach a CONSISTENT state, we record commits of all
225  * transactions, not just the catalog changing ones. Record when that
226  * changes so we know we cannot export a snapshot safely anymore.
227  */
229 
230  /*
231  * Array of committed transactions that have modified the catalog.
232  *
233  * As this array is frequently modified we do *not* keep it in
234  * xidComparator order. Instead we sort the array when building &
235  * distributing a snapshot.
236  *
237  * TODO: It's unclear whether that reasoning has much merit. Every
238  * time we add something here after becoming consistent will also
239  * require distributing a snapshot. Storing them sorted would
240  * potentially also make it easier to purge (but more complicated wrt
241  * wraparound?). Should be improved if sorting while building the
242  * snapshot shows up in profiles.
243  */
245  } committed;
246 };
247 
248 /*
249  * Starting a transaction -- which we need to do while exporting a snapshot --
250  * removes knowledge about the previously used resowner, so we save it here.
251  */
253 static bool ExportInProgress = false;
254 
255 /* ->committed manipulation */
256 static void SnapBuildPurgeCommittedTxn(SnapBuild *builder);
257 
258 /* snapshot building/manipulation/distribution functions */
259 static Snapshot SnapBuildBuildSnapshot(SnapBuild *builder);
260 
261 static void SnapBuildFreeSnapshot(Snapshot snap);
262 
263 static void SnapBuildSnapIncRefcount(Snapshot snap);
264 
266 
267 /* xlog reading helper functions for SnapBuildProcessRunningXacts */
268 static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running);
269 static void SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff);
270 
271 /* serialization functions */
272 static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn);
273 static bool SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn);
274 
275 /*
276  * Return TransactionId after which the next phase of initial snapshot
277  * building will happen.
278  */
279 static inline TransactionId
281 {
282  /*
283  * For backward compatibility reasons this has to be stored in the wrongly
284  * named field. Will be fixed in next major version.
285  */
286  return builder->was_running.was_xmax;
287 }
288 
289 /*
290  * Set TransactionId after which the next phase of initial snapshot building
291  * will happen.
292  */
293 static inline void
295 {
296  /*
297  * For backward compatibility reasons this has to be stored in the wrongly
298  * named field. Will be fixed in next major version.
299  */
300  builder->was_running.was_xmax = at;
301 }
302 
303 /*
304  * Allocate a new snapshot builder.
305  *
306  * xmin_horizon is the xid >= which we can be sure no catalog rows have been
307  * removed, start_lsn is the LSN >= we want to replay commits.
308  */
309 SnapBuild *
311  TransactionId xmin_horizon,
312  XLogRecPtr start_lsn,
313  bool need_full_snapshot)
314 {
316  MemoryContext oldcontext;
317  SnapBuild *builder;
318 
319  /* allocate memory in own context, to have better accountability */
321  "snapshot builder context",
323  oldcontext = MemoryContextSwitchTo(context);
324 
325  builder = palloc0(sizeof(SnapBuild));
326 
327  builder->state = SNAPBUILD_START;
328  builder->context = context;
329  builder->reorder = reorder;
330  /* Other struct members initialized by zeroing via palloc0 above */
331 
332  builder->committed.xcnt = 0;
333  builder->committed.xcnt_space = 128; /* arbitrary number */
334  builder->committed.xip =
335  palloc0(builder->committed.xcnt_space * sizeof(TransactionId));
336  builder->committed.includes_all_transactions = true;
337 
338  builder->initial_xmin_horizon = xmin_horizon;
339  builder->start_decoding_at = start_lsn;
340  builder->building_full_snapshot = need_full_snapshot;
341 
342  MemoryContextSwitchTo(oldcontext);
343 
344  return builder;
345 }
346 
347 /*
348  * Free a snapshot builder.
349  */
350 void
352 {
353  MemoryContext context = builder->context;
354 
355  /* free snapshot explicitly, that contains some error checking */
356  if (builder->snapshot != NULL)
357  {
359  builder->snapshot = NULL;
360  }
361 
362  /* other resources are deallocated via memory context reset */
363  MemoryContextDelete(context);
364 }
365 
366 /*
367  * Free an unreferenced snapshot that has previously been built by us.
368  */
369 static void
371 {
372  /* make sure we don't get passed an external snapshot */
374 
375  /* make sure nobody modified our snapshot */
376  Assert(snap->curcid == FirstCommandId);
377  Assert(!snap->suboverflowed);
378  Assert(!snap->takenDuringRecovery);
379  Assert(snap->regd_count == 0);
380 
381  /* slightly more likely, so it's checked even without c-asserts */
382  if (snap->copied)
383  elog(ERROR, "cannot free a copied snapshot");
384 
385  if (snap->active_count)
386  elog(ERROR, "cannot free an active snapshot");
387 
388  pfree(snap);
389 }
390 
391 /*
392  * In which state of snapshot building are we?
393  */
396 {
397  return builder->state;
398 }
399 
400 /*
401  * Should the contents of transaction ending at 'ptr' be decoded?
402  */
403 bool
405 {
406  return ptr < builder->start_decoding_at;
407 }
408 
409 /*
410  * Increase refcount of a snapshot.
411  *
412  * This is used when handing out a snapshot to some external resource or when
413  * adding a Snapshot as builder->snapshot.
414  */
415 static void
417 {
418  snap->active_count++;
419 }
420 
421 /*
422  * Decrease refcount of a snapshot and free if the refcount reaches zero.
423  *
424  * Externally visible, so that external resources that have been handed an
425  * IncRef'ed Snapshot can adjust its refcount easily.
426  */
427 void
429 {
430  /* make sure we don't get passed an external snapshot */
432 
433  /* make sure nobody modified our snapshot */
434  Assert(snap->curcid == FirstCommandId);
435  Assert(!snap->suboverflowed);
436  Assert(!snap->takenDuringRecovery);
437 
438  Assert(snap->regd_count == 0);
439 
440  Assert(snap->active_count > 0);
441 
442  /* slightly more likely, so it's checked even without casserts */
443  if (snap->copied)
444  elog(ERROR, "cannot free a copied snapshot");
445 
446  snap->active_count--;
447  if (snap->active_count == 0)
448  SnapBuildFreeSnapshot(snap);
449 }
450 
451 /*
452  * Build a new snapshot, based on currently committed catalog-modifying
453  * transactions.
454  *
455  * In-progress transactions with catalog access are *not* allowed to modify
456  * these snapshots; they have to copy them and fill in appropriate ->curcid
457  * and ->subxip/subxcnt values.
458  */
459 static Snapshot
461 {
463  Size ssize;
464 
465  Assert(builder->state >= SNAPBUILD_FULL_SNAPSHOT);
466 
467  ssize = sizeof(SnapshotData)
468  + sizeof(TransactionId) * builder->committed.xcnt
469  + sizeof(TransactionId) * 1 /* toplevel xid */ ;
470 
471  snapshot = MemoryContextAllocZero(builder->context, ssize);
472 
474 
475  /*
476  * We misuse the original meaning of SnapshotData's xip and subxip fields
477  * to make the more fitting for our needs.
478  *
479  * In the 'xip' array we store transactions that have to be treated as
480  * committed. Since we will only ever look at tuples from transactions
481  * that have modified the catalog it's more efficient to store those few
482  * that exist between xmin and xmax (frequently there are none).
483  *
484  * Snapshots that are used in transactions that have modified the catalog
485  * also use the 'subxip' array to store their toplevel xid and all the
486  * subtransaction xids so we can recognize when we need to treat rows as
487  * visible that are not in xip but still need to be visible. Subxip only
488  * gets filled when the transaction is copied into the context of a
489  * catalog modifying transaction since we otherwise share a snapshot
490  * between transactions. As long as a txn hasn't modified the catalog it
491  * doesn't need to treat any uncommitted rows as visible, so there is no
492  * need for those xids.
493  *
494  * Both arrays are qsort'ed so that we can use bsearch() on them.
495  */
496  Assert(TransactionIdIsNormal(builder->xmin));
497  Assert(TransactionIdIsNormal(builder->xmax));
498 
499  snapshot->xmin = builder->xmin;
500  snapshot->xmax = builder->xmax;
501 
502  /* store all transactions to be treated as committed by this snapshot */
503  snapshot->xip =
504  (TransactionId *) ((char *) snapshot + sizeof(SnapshotData));
505  snapshot->xcnt = builder->committed.xcnt;
506  memcpy(snapshot->xip,
507  builder->committed.xip,
508  builder->committed.xcnt * sizeof(TransactionId));
509 
510  /* sort so we can bsearch() */
511  qsort(snapshot->xip, snapshot->xcnt, sizeof(TransactionId), xidComparator);
512 
513  /*
514  * Initially, subxip is empty, i.e. it's a snapshot to be used by
515  * transactions that don't modify the catalog. Will be filled by
516  * ReorderBufferCopySnap() if necessary.
517  */
518  snapshot->subxcnt = 0;
519  snapshot->subxip = NULL;
520 
521  snapshot->suboverflowed = false;
522  snapshot->takenDuringRecovery = false;
523  snapshot->copied = false;
524  snapshot->curcid = FirstCommandId;
525  snapshot->active_count = 0;
526  snapshot->regd_count = 0;
527 
528  return snapshot;
529 }
530 
531 /*
532  * Build the initial slot snapshot and convert it to a normal snapshot that
533  * is understood by HeapTupleSatisfiesMVCC.
534  *
535  * The snapshot will be usable directly in current transaction or exported
536  * for loading in different transaction.
537  */
538 Snapshot
540 {
541  Snapshot snap;
542  TransactionId xid;
543  TransactionId *newxip;
544  int newxcnt = 0;
545 
548 
549  if (builder->state != SNAPBUILD_CONSISTENT)
550  elog(ERROR, "cannot build an initial slot snapshot before reaching a consistent state");
551 
552  if (!builder->committed.includes_all_transactions)
553  elog(ERROR, "cannot build an initial slot snapshot, not all transactions are monitored anymore");
554 
555  /* so we don't overwrite the existing value */
557  elog(ERROR, "cannot build an initial slot snapshot when MyPgXact->xmin already is valid");
558 
559  snap = SnapBuildBuildSnapshot(builder);
560 
561  /*
562  * We know that snap->xmin is alive, enforced by the logical xmin
563  * mechanism. Due to that we can do this without locks, we're only
564  * changing our own value.
565  */
566 #ifdef USE_ASSERT_CHECKING
567  {
568  TransactionId safeXid;
569 
570  LWLockAcquire(ProcArrayLock, LW_SHARED);
571  safeXid = GetOldestSafeDecodingTransactionId(false);
572  LWLockRelease(ProcArrayLock);
573 
574  Assert(TransactionIdPrecedesOrEquals(safeXid, snap->xmin));
575  }
576 #endif
577 
578  MyPgXact->xmin = snap->xmin;
579 
580  /* allocate in transaction context */
581  newxip = (TransactionId *)
583 
584  /*
585  * snapbuild.c builds transactions in an "inverted" manner, which means it
586  * stores committed transactions in ->xip, not ones in progress. Build a
587  * classical snapshot by marking all non-committed transactions as
588  * in-progress. This can be expensive.
589  */
590  for (xid = snap->xmin; NormalTransactionIdPrecedes(xid, snap->xmax);)
591  {
592  void *test;
593 
594  /*
595  * Check whether transaction committed using the decoding snapshot
596  * meaning of ->xip.
597  */
598  test = bsearch(&xid, snap->xip, snap->xcnt,
599  sizeof(TransactionId), xidComparator);
600 
601  if (test == NULL)
602  {
603  if (newxcnt >= GetMaxSnapshotXidCount())
604  ereport(ERROR,
605  (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
606  errmsg("initial slot snapshot too large")));
607 
608  newxip[newxcnt++] = xid;
609  }
610 
612  }
613 
614  /* adjust remaining snapshot fields as needed */
616  snap->xcnt = newxcnt;
617  snap->xip = newxip;
618 
619  return snap;
620 }
621 
622 /*
623  * Export a snapshot so it can be set in another session with SET TRANSACTION
624  * SNAPSHOT.
625  *
626  * For that we need to start a transaction in the current backend as the
627  * importing side checks whether the source transaction is still open to make
628  * sure the xmin horizon hasn't advanced since then.
629  */
630 const char *
632 {
633  Snapshot snap;
634  char *snapname;
635 
637  elog(ERROR, "cannot export a snapshot from within a transaction");
638 
639  if (SavedResourceOwnerDuringExport)
640  elog(ERROR, "can only export one snapshot at a time");
641 
642  SavedResourceOwnerDuringExport = CurrentResourceOwner;
643  ExportInProgress = true;
644 
646 
647  /* There doesn't seem to a nice API to set these */
649  XactReadOnly = true;
650 
651  snap = SnapBuildInitialSnapshot(builder);
652 
653  /*
654  * now that we've built a plain snapshot, make it active and use the
655  * normal mechanisms for exporting it
656  */
657  snapname = ExportSnapshot(snap);
658 
659  ereport(LOG,
660  (errmsg_plural("exported logical decoding snapshot: \"%s\" with %u transaction ID",
661  "exported logical decoding snapshot: \"%s\" with %u transaction IDs",
662  snap->xcnt,
663  snapname, snap->xcnt)));
664  return snapname;
665 }
666 
667 /*
668  * Ensure there is a snapshot and if not build one for current transaction.
669  */
670 Snapshot
672 {
673  Assert(builder->state == SNAPBUILD_CONSISTENT);
674 
675  /* only build a new snapshot if we don't have a prebuilt one */
676  if (builder->snapshot == NULL)
677  {
678  builder->snapshot = SnapBuildBuildSnapshot(builder);
679  /* increase refcount for the snapshot builder */
681  }
682 
683  return builder->snapshot;
684 }
685 
686 /*
687  * Reset a previously SnapBuildExportSnapshot()'ed snapshot if there is
688  * any. Aborts the previously started transaction and resets the resource
689  * owner back to its original value.
690  */
691 void
693 {
694  /* nothing exported, that is the usual case */
695  if (!ExportInProgress)
696  return;
697 
698  if (!IsTransactionState())
699  elog(ERROR, "clearing exported snapshot in wrong transaction state");
700 
701  /* make sure nothing could have ever happened */
703 
705  SavedResourceOwnerDuringExport = NULL;
706  ExportInProgress = false;
707 }
708 
709 /*
710  * Handle the effects of a single heap change, appropriate to the current state
711  * of the snapshot builder and returns whether changes made at (xid, lsn) can
712  * be decoded.
713  */
714 bool
716 {
717  /*
718  * We can't handle data in transactions if we haven't built a snapshot
719  * yet, so don't store them.
720  */
721  if (builder->state < SNAPBUILD_FULL_SNAPSHOT)
722  return false;
723 
724  /*
725  * No point in keeping track of changes in transactions that we don't have
726  * enough information about to decode. This means that they started before
727  * we got into the SNAPBUILD_FULL_SNAPSHOT state.
728  */
729  if (builder->state < SNAPBUILD_CONSISTENT &&
731  return false;
732 
733  /*
734  * If the reorderbuffer doesn't yet have a snapshot, add one now, it will
735  * be needed to decode the change we're currently processing.
736  */
737  if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, xid))
738  {
739  /* only build a new snapshot if we don't have a prebuilt one */
740  if (builder->snapshot == NULL)
741  {
742  builder->snapshot = SnapBuildBuildSnapshot(builder);
743  /* increase refcount for the snapshot builder */
745  }
746 
747  /*
748  * Increase refcount for the transaction we're handing the snapshot
749  * out to.
750  */
752  ReorderBufferSetBaseSnapshot(builder->reorder, xid, lsn,
753  builder->snapshot);
754  }
755 
756  return true;
757 }
758 
759 /*
760  * Do CommandId/ComboCid handling after reading an xl_heap_new_cid record.
761  * This implies that a transaction has done some form of write to system
762  * catalogs.
763  */
764 void
766  XLogRecPtr lsn, xl_heap_new_cid *xlrec)
767 {
768  CommandId cid;
769 
770  /*
771  * we only log new_cid's if a catalog tuple was modified, so mark the
772  * transaction as containing catalog modifications
773  */
774  ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
775 
776  ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
777  xlrec->target_node, xlrec->target_tid,
778  xlrec->cmin, xlrec->cmax,
779  xlrec->combocid);
780 
781  /* figure out new command id */
782  if (xlrec->cmin != InvalidCommandId &&
783  xlrec->cmax != InvalidCommandId)
784  cid = Max(xlrec->cmin, xlrec->cmax);
785  else if (xlrec->cmax != InvalidCommandId)
786  cid = xlrec->cmax;
787  else if (xlrec->cmin != InvalidCommandId)
788  cid = xlrec->cmin;
789  else
790  {
791  cid = InvalidCommandId; /* silence compiler */
792  elog(ERROR, "xl_heap_new_cid record without a valid CommandId");
793  }
794 
795  ReorderBufferAddNewCommandId(builder->reorder, xid, lsn, cid + 1);
796 }
797 
798 /*
799  * Add a new Snapshot to all transactions we're decoding that currently are
800  * in-progress so they can see new catalog contents made by the transaction
801  * that just committed. This is necessary because those in-progress
802  * transactions will use the new catalog's contents from here on (at the very
803  * least everything they do needs to be compatible with newer catalog
804  * contents).
805  */
806 static void
808 {
809  dlist_iter txn_i;
810  ReorderBufferTXN *txn;
811 
812  /*
813  * Iterate through all toplevel transactions. This can include
814  * subtransactions which we just don't yet know to be that, but that's
815  * fine, they will just get an unnecessary snapshot queued.
816  */
817  dlist_foreach(txn_i, &builder->reorder->toplevel_by_lsn)
818  {
819  txn = dlist_container(ReorderBufferTXN, node, txn_i.cur);
820 
822 
823  /*
824  * If we don't have a base snapshot yet, there are no changes in this
825  * transaction which in turn implies we don't yet need a snapshot at
826  * all. We'll add a snapshot when the first change gets queued.
827  *
828  * NB: This works correctly even for subtransactions because
829  * ReorderBufferAssignChild() takes care to transfer the base snapshot
830  * to the top-level transaction, and while iterating the changequeue
831  * we'll get the change from the subtxn.
832  */
833  if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, txn->xid))
834  continue;
835 
836  elog(DEBUG2, "adding a new snapshot to %u at %X/%X",
837  txn->xid, (uint32) (lsn >> 32), (uint32) lsn);
838 
839  /*
840  * increase the snapshot's refcount for the transaction we are handing
841  * it out to
842  */
844  ReorderBufferAddSnapshot(builder->reorder, txn->xid, lsn,
845  builder->snapshot);
846  }
847 }
848 
849 /*
850  * Keep track of a new catalog changing transaction that has committed.
851  */
852 static void
854 {
856 
857  if (builder->committed.xcnt == builder->committed.xcnt_space)
858  {
859  builder->committed.xcnt_space = builder->committed.xcnt_space * 2 + 1;
860 
861  elog(DEBUG1, "increasing space for committed transactions to %u",
862  (uint32) builder->committed.xcnt_space);
863 
864  builder->committed.xip = repalloc(builder->committed.xip,
865  builder->committed.xcnt_space * sizeof(TransactionId));
866  }
867 
868  /*
869  * TODO: It might make sense to keep the array sorted here instead of
870  * doing it every time we build a new snapshot. On the other hand this
871  * gets called repeatedly when a transaction with subtransactions commits.
872  */
873  builder->committed.xip[builder->committed.xcnt++] = xid;
874 }
875 
876 /*
877  * Remove knowledge about transactions we treat as committed that are smaller
878  * than ->xmin. Those won't ever get checked via the ->committed array but via
879  * the clog machinery, so we don't need to waste memory on them.
880  */
881 static void
883 {
884  int off;
885  TransactionId *workspace;
886  int surviving_xids = 0;
887 
888  /* not ready yet */
889  if (!TransactionIdIsNormal(builder->xmin))
890  return;
891 
892  /* TODO: Neater algorithm than just copying and iterating? */
893  workspace =
894  MemoryContextAlloc(builder->context,
895  builder->committed.xcnt * sizeof(TransactionId));
896 
897  /* copy xids that still are interesting to workspace */
898  for (off = 0; off < builder->committed.xcnt; off++)
899  {
900  if (NormalTransactionIdPrecedes(builder->committed.xip[off],
901  builder->xmin))
902  ; /* remove */
903  else
904  workspace[surviving_xids++] = builder->committed.xip[off];
905  }
906 
907  /* copy workspace back to persistent state */
908  memcpy(builder->committed.xip, workspace,
909  surviving_xids * sizeof(TransactionId));
910 
911  elog(DEBUG3, "purged committed transactions from %u to %u, xmin: %u, xmax: %u",
912  (uint32) builder->committed.xcnt, (uint32) surviving_xids,
913  builder->xmin, builder->xmax);
914  builder->committed.xcnt = surviving_xids;
915 
916  pfree(workspace);
917 }
918 
919 /*
920  * Handle everything that needs to be done when a transaction commits
921  */
922 void
924  int nsubxacts, TransactionId *subxacts)
925 {
926  int nxact;
927 
928  bool needs_snapshot = false;
929  bool needs_timetravel = false;
930  bool sub_needs_timetravel = false;
931 
932  TransactionId xmax = xid;
933 
934  /*
935  * Transactions preceding BUILDING_SNAPSHOT will neither be decoded, nor
936  * will they be part of a snapshot. So we don't need to record anything.
937  */
938  if (builder->state == SNAPBUILD_START ||
939  (builder->state == SNAPBUILD_BUILDING_SNAPSHOT &&
941  {
942  /* ensure that only commits after this are getting replayed */
943  if (builder->start_decoding_at <= lsn)
944  builder->start_decoding_at = lsn + 1;
945  return;
946  }
947 
948  if (builder->state < SNAPBUILD_CONSISTENT)
949  {
950  /* ensure that only commits after this are getting replayed */
951  if (builder->start_decoding_at <= lsn)
952  builder->start_decoding_at = lsn + 1;
953 
954  /*
955  * If building an exportable snapshot, force xid to be tracked, even
956  * if the transaction didn't modify the catalog.
957  */
958  if (builder->building_full_snapshot)
959  {
960  needs_timetravel = true;
961  }
962  }
963 
964  for (nxact = 0; nxact < nsubxacts; nxact++)
965  {
966  TransactionId subxid = subxacts[nxact];
967 
968  /*
969  * Add subtransaction to base snapshot if catalog modifying, we don't
970  * distinguish to toplevel transactions there.
971  */
972  if (ReorderBufferXidHasCatalogChanges(builder->reorder, subxid))
973  {
974  sub_needs_timetravel = true;
975  needs_snapshot = true;
976 
977  elog(DEBUG1, "found subtransaction %u:%u with catalog changes",
978  xid, subxid);
979 
980  SnapBuildAddCommittedTxn(builder, subxid);
981 
982  if (NormalTransactionIdFollows(subxid, xmax))
983  xmax = subxid;
984  }
985 
986  /*
987  * If we're forcing timetravel we also need visibility information
988  * about subtransaction, so keep track of subtransaction's state, even
989  * if not catalog modifying. Don't need to distribute a snapshot in
990  * that case.
991  */
992  else if (needs_timetravel)
993  {
994  SnapBuildAddCommittedTxn(builder, subxid);
995  if (NormalTransactionIdFollows(subxid, xmax))
996  xmax = subxid;
997  }
998  }
999 
1000  /* if top-level modified catalog, it'll need a snapshot */
1001  if (ReorderBufferXidHasCatalogChanges(builder->reorder, xid))
1002  {
1003  elog(DEBUG2, "found top level transaction %u, with catalog changes",
1004  xid);
1005  needs_snapshot = true;
1006  needs_timetravel = true;
1007  SnapBuildAddCommittedTxn(builder, xid);
1008  }
1009  else if (sub_needs_timetravel)
1010  {
1011  /* track toplevel txn as well, subxact alone isn't meaningful */
1012  SnapBuildAddCommittedTxn(builder, xid);
1013  }
1014  else if (needs_timetravel)
1015  {
1016  elog(DEBUG2, "forced transaction %u to do timetravel", xid);
1017 
1018  SnapBuildAddCommittedTxn(builder, xid);
1019  }
1020 
1021  if (!needs_timetravel)
1022  {
1023  /* record that we cannot export a general snapshot anymore */
1024  builder->committed.includes_all_transactions = false;
1025  }
1026 
1027  Assert(!needs_snapshot || needs_timetravel);
1028 
1029  /*
1030  * Adjust xmax of the snapshot builder, we only do that for committed,
1031  * catalog modifying, transactions, everything else isn't interesting for
1032  * us since we'll never look at the respective rows.
1033  */
1034  if (needs_timetravel &&
1035  (!TransactionIdIsValid(builder->xmax) ||
1036  TransactionIdFollowsOrEquals(xmax, builder->xmax)))
1037  {
1038  builder->xmax = xmax;
1039  TransactionIdAdvance(builder->xmax);
1040  }
1041 
1042  /* if there's any reason to build a historic snapshot, do so now */
1043  if (needs_snapshot)
1044  {
1045  /*
1046  * If we haven't built a complete snapshot yet there's no need to hand
1047  * it out, it wouldn't (and couldn't) be used anyway.
1048  */
1049  if (builder->state < SNAPBUILD_FULL_SNAPSHOT)
1050  return;
1051 
1052  /*
1053  * Decrease the snapshot builder's refcount of the old snapshot, note
1054  * that it still will be used if it has been handed out to the
1055  * reorderbuffer earlier.
1056  */
1057  if (builder->snapshot)
1059 
1060  builder->snapshot = SnapBuildBuildSnapshot(builder);
1061 
1062  /* we might need to execute invalidations, add snapshot */
1063  if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, xid))
1064  {
1066  ReorderBufferSetBaseSnapshot(builder->reorder, xid, lsn,
1067  builder->snapshot);
1068  }
1069 
1070  /* refcount of the snapshot builder for the new snapshot */
1072 
1073  /* add a new catalog snapshot to all currently running transactions */
1075  }
1076 }
1077 
1078 
1079 /* -----------------------------------
1080  * Snapshot building functions dealing with xlog records
1081  * -----------------------------------
1082  */
1083 
1084 /*
1085  * Process a running xacts record, and use its information to first build a
1086  * historic snapshot and later to release resources that aren't needed
1087  * anymore.
1088  */
1089 void
1091 {
1092  ReorderBufferTXN *txn;
1094 
1095  /*
1096  * If we're not consistent yet, inspect the record to see whether it
1097  * allows to get closer to being consistent. If we are consistent, dump
1098  * our snapshot so others or we, after a restart, can use it.
1099  */
1100  if (builder->state < SNAPBUILD_CONSISTENT)
1101  {
1102  /* returns false if there's no point in performing cleanup just yet */
1103  if (!SnapBuildFindSnapshot(builder, lsn, running))
1104  return;
1105  }
1106  else
1107  SnapBuildSerialize(builder, lsn);
1108 
1109  /*
1110  * Update range of interesting xids based on the running xacts
1111  * information. We don't increase ->xmax using it, because once we are in
1112  * a consistent state we can do that ourselves and much more efficiently
1113  * so, because we only need to do it for catalog transactions since we
1114  * only ever look at those.
1115  *
1116  * NB: We only increase xmax when a catalog modifying transaction commits
1117  * (see SnapBuildCommitTxn). Because of this, xmax can be lower than
1118  * xmin, which looks odd but is correct and actually more efficient, since
1119  * we hit fast paths in heapam_visibility.c.
1120  */
1121  builder->xmin = running->oldestRunningXid;
1122 
1123  /* Remove transactions we don't need to keep track off anymore */
1124  SnapBuildPurgeCommittedTxn(builder);
1125 
1126  /*
1127  * Advance the xmin limit for the current replication slot, to allow
1128  * vacuum to clean up the tuples this slot has been protecting.
1129  *
1130  * The reorderbuffer might have an xmin among the currently running
1131  * snapshots; use it if so. If not, we need only consider the snapshots
1132  * we'll produce later, which can't be less than the oldest running xid in
1133  * the record we're reading now.
1134  */
1135  xmin = ReorderBufferGetOldestXmin(builder->reorder);
1136  if (xmin == InvalidTransactionId)
1137  xmin = running->oldestRunningXid;
1138  elog(DEBUG3, "xmin: %u, xmax: %u, oldest running: %u, oldest xmin: %u",
1139  builder->xmin, builder->xmax, running->oldestRunningXid, xmin);
1140  LogicalIncreaseXminForSlot(lsn, xmin);
1141 
1142  /*
1143  * Also tell the slot where we can restart decoding from. We don't want to
1144  * do that after every commit because changing that implies an fsync of
1145  * the logical slot's state file, so we only do it every time we see a
1146  * running xacts record.
1147  *
1148  * Do so by looking for the oldest in progress transaction (determined by
1149  * the first LSN of any of its relevant records). Every transaction
1150  * remembers the last location we stored the snapshot to disk before its
1151  * beginning. That point is where we can restart from.
1152  */
1153 
1154  /*
1155  * Can't know about a serialized snapshot's location if we're not
1156  * consistent.
1157  */
1158  if (builder->state < SNAPBUILD_CONSISTENT)
1159  return;
1160 
1161  txn = ReorderBufferGetOldestTXN(builder->reorder);
1162 
1163  /*
1164  * oldest ongoing txn might have started when we didn't yet serialize
1165  * anything because we hadn't reached a consistent state yet.
1166  */
1167  if (txn != NULL && txn->restart_decoding_lsn != InvalidXLogRecPtr)
1169 
1170  /*
1171  * No in-progress transaction, can reuse the last serialized snapshot if
1172  * we have one.
1173  */
1174  else if (txn == NULL &&
1178  builder->last_serialized_snapshot);
1179 }
1180 
1181 
1182 /*
1183  * Build the start of a snapshot that's capable of decoding the catalog.
1184  *
1185  * Helper function for SnapBuildProcessRunningXacts() while we're not yet
1186  * consistent.
1187  *
1188  * Returns true if there is a point in performing internal maintenance/cleanup
1189  * using the xl_running_xacts record.
1190  */
1191 static bool
1193 {
1194  /* ---
1195  * Build catalog decoding snapshot incrementally using information about
1196  * the currently running transactions. There are several ways to do that:
1197  *
1198  * a) There were no running transactions when the xl_running_xacts record
1199  * was inserted, jump to CONSISTENT immediately. We might find such a
1200  * state while waiting on c)'s sub-states.
1201  *
1202  * b) This (in a previous run) or another decoding slot serialized a
1203  * snapshot to disk that we can use. Can't use this method for the
1204  * initial snapshot when slot is being created and needs full snapshot
1205  * for export or direct use, as that snapshot will only contain catalog
1206  * modifying transactions.
1207  *
1208  * c) First incrementally build a snapshot for catalog tuples
1209  * (BUILDING_SNAPSHOT), that requires all, already in-progress,
1210  * transactions to finish. Every transaction starting after that
1211  * (FULL_SNAPSHOT state), has enough information to be decoded. But
1212  * for older running transactions no viable snapshot exists yet, so
1213  * CONSISTENT will only be reached once all of those have finished.
1214  * ---
1215  */
1216 
1217  /*
1218  * xl_running_xact record is older than what we can use, we might not have
1219  * all necessary catalog rows anymore.
1220  */
1223  builder->initial_xmin_horizon))
1224  {
1225  ereport(DEBUG1,
1226  (errmsg_internal("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low",
1227  (uint32) (lsn >> 32), (uint32) lsn),
1228  errdetail_internal("initial xmin horizon of %u vs the snapshot's %u",
1229  builder->initial_xmin_horizon, running->oldestRunningXid)));
1230 
1231 
1232  SnapBuildWaitSnapshot(running, builder->initial_xmin_horizon);
1233 
1234  return true;
1235  }
1236 
1237  /*
1238  * a) No transaction were running, we can jump to consistent.
1239  *
1240  * This is not affected by races around xl_running_xacts, because we can
1241  * miss transaction commits, but currently not transactions starting.
1242  *
1243  * NB: We might have already started to incrementally assemble a snapshot,
1244  * so we need to be careful to deal with that.
1245  */
1246  if (running->oldestRunningXid == running->nextXid)
1247  {
1248  if (builder->start_decoding_at == InvalidXLogRecPtr ||
1249  builder->start_decoding_at <= lsn)
1250  /* can decode everything after this */
1251  builder->start_decoding_at = lsn + 1;
1252 
1253  /* As no transactions were running xmin/xmax can be trivially set. */
1254  builder->xmin = running->nextXid; /* < are finished */
1255  builder->xmax = running->nextXid; /* >= are running */
1256 
1257  /* so we can safely use the faster comparisons */
1258  Assert(TransactionIdIsNormal(builder->xmin));
1259  Assert(TransactionIdIsNormal(builder->xmax));
1260 
1261  builder->state = SNAPBUILD_CONSISTENT;
1263 
1264  ereport(LOG,
1265  (errmsg("logical decoding found consistent point at %X/%X",
1266  (uint32) (lsn >> 32), (uint32) lsn),
1267  errdetail("There are no running transactions.")));
1268 
1269  return false;
1270  }
1271  /* b) valid on disk state and not building full snapshot */
1272  else if (!builder->building_full_snapshot &&
1273  SnapBuildRestore(builder, lsn))
1274  {
1275  /* there won't be any state to cleanup */
1276  return false;
1277  }
1278 
1279  /*
1280  * c) transition from START to BUILDING_SNAPSHOT.
1281  *
1282  * In START state, and a xl_running_xacts record with running xacts is
1283  * encountered. In that case, switch to BUILDING_SNAPSHOT state, and
1284  * record xl_running_xacts->nextXid. Once all running xacts have finished
1285  * (i.e. they're all >= nextXid), we have a complete catalog snapshot. It
1286  * might look that we could use xl_running_xact's ->xids information to
1287  * get there quicker, but that is problematic because transactions marked
1288  * as running, might already have inserted their commit record - it's
1289  * infeasible to change that with locking.
1290  */
1291  else if (builder->state == SNAPBUILD_START)
1292  {
1294  SnapBuildStartNextPhaseAt(builder, running->nextXid);
1295 
1296  /*
1297  * Start with an xmin/xmax that's correct for future, when all the
1298  * currently running transactions have finished. We'll update both
1299  * while waiting for the pending transactions to finish.
1300  */
1301  builder->xmin = running->nextXid; /* < are finished */
1302  builder->xmax = running->nextXid; /* >= are running */
1303 
1304  /* so we can safely use the faster comparisons */
1305  Assert(TransactionIdIsNormal(builder->xmin));
1306  Assert(TransactionIdIsNormal(builder->xmax));
1307 
1308  ereport(LOG,
1309  (errmsg("logical decoding found initial starting point at %X/%X",
1310  (uint32) (lsn >> 32), (uint32) lsn),
1311  errdetail("Waiting for transactions (approximately %d) older than %u to end.",
1312  running->xcnt, running->nextXid)));
1313 
1314  SnapBuildWaitSnapshot(running, running->nextXid);
1315  }
1316 
1317  /*
1318  * c) transition from BUILDING_SNAPSHOT to FULL_SNAPSHOT.
1319  *
1320  * In BUILDING_SNAPSHOT state, and this xl_running_xacts' oldestRunningXid
1321  * is >= than nextXid from when we switched to BUILDING_SNAPSHOT. This
1322  * means all transactions starting afterwards have enough information to
1323  * be decoded. Switch to FULL_SNAPSHOT.
1324  */
1325  else if (builder->state == SNAPBUILD_BUILDING_SNAPSHOT &&
1327  running->oldestRunningXid))
1328  {
1329  builder->state = SNAPBUILD_FULL_SNAPSHOT;
1330  SnapBuildStartNextPhaseAt(builder, running->nextXid);
1331 
1332  ereport(LOG,
1333  (errmsg("logical decoding found initial consistent point at %X/%X",
1334  (uint32) (lsn >> 32), (uint32) lsn),
1335  errdetail("Waiting for transactions (approximately %d) older than %u to end.",
1336  running->xcnt, running->nextXid)));
1337 
1338  SnapBuildWaitSnapshot(running, running->nextXid);
1339  }
1340 
1341  /*
1342  * c) transition from FULL_SNAPSHOT to CONSISTENT.
1343  *
1344  * In FULL_SNAPSHOT state (see d) ), and this xl_running_xacts'
1345  * oldestRunningXid is >= than nextXid from when we switched to
1346  * FULL_SNAPSHOT. This means all transactions that are currently in
1347  * progress have a catalog snapshot, and all their changes have been
1348  * collected. Switch to CONSISTENT.
1349  */
1350  else if (builder->state == SNAPBUILD_FULL_SNAPSHOT &&
1352  running->oldestRunningXid))
1353  {
1354  builder->state = SNAPBUILD_CONSISTENT;
1356 
1357  ereport(LOG,
1358  (errmsg("logical decoding found consistent point at %X/%X",
1359  (uint32) (lsn >> 32), (uint32) lsn),
1360  errdetail("There are no old transactions anymore.")));
1361  }
1362 
1363  /*
1364  * We already started to track running xacts and need to wait for all
1365  * in-progress ones to finish. We fall through to the normal processing of
1366  * records so incremental cleanup can be performed.
1367  */
1368  return true;
1369 
1370 }
1371 
1372 /* ---
1373  * Iterate through xids in record, wait for all older than the cutoff to
1374  * finish. Then, if possible, log a new xl_running_xacts record.
1375  *
1376  * This isn't required for the correctness of decoding, but to:
1377  * a) allow isolationtester to notice that we're currently waiting for
1378  * something.
1379  * b) log a new xl_running_xacts record where it'd be helpful, without having
1380  * to write for bgwriter or checkpointer.
1381  * ---
1382  */
1383 static void
1385 {
1386  int off;
1387 
1388  for (off = 0; off < running->xcnt; off++)
1389  {
1390  TransactionId xid = running->xids[off];
1391 
1392  /*
1393  * Upper layers should prevent that we ever need to wait on ourselves.
1394  * Check anyway, since failing to do so would either result in an
1395  * endless wait or an Assert() failure.
1396  */
1398  elog(ERROR, "waiting for ourselves");
1399 
1400  if (TransactionIdFollows(xid, cutoff))
1401  continue;
1402 
1403  XactLockTableWait(xid, NULL, NULL, XLTW_None);
1404  }
1405 
1406  /*
1407  * All transactions we needed to finish finished - try to ensure there is
1408  * another xl_running_xacts record in a timely manner, without having to
1409  * write for bgwriter or checkpointer to log one. During recovery we
1410  * can't enforce that, so we'll have to wait.
1411  */
1412  if (!RecoveryInProgress())
1413  {
1415  }
1416 }
1417 
1418 /* -----------------------------------
1419  * Snapshot serialization support
1420  * -----------------------------------
1421  */
1422 
1423 /*
1424  * We store current state of struct SnapBuild on disk in the following manner:
1425  *
1426  * struct SnapBuildOnDisk;
1427  * TransactionId * running.xcnt_space;
1428  * TransactionId * committed.xcnt; (*not xcnt_space*)
1429  *
1430  */
1431 typedef struct SnapBuildOnDisk
1432 {
1433  /* first part of this struct needs to be version independent */
1434 
1435  /* data not covered by checksum */
1438 
1439  /* data covered by checksum */
1440 
1441  /* version, in case we want to support pg_upgrade */
1443  /* how large is the on disk data, excluding the constant sized part */
1445 
1446  /* version dependent part */
1448 
1449  /* variable amount of TransactionIds follows */
1450 } SnapBuildOnDisk;
1451 
1452 #define SnapBuildOnDiskConstantSize \
1453  offsetof(SnapBuildOnDisk, builder)
1454 #define SnapBuildOnDiskNotChecksummedSize \
1455  offsetof(SnapBuildOnDisk, version)
1456 
1457 #define SNAPBUILD_MAGIC 0x51A1E001
1458 #define SNAPBUILD_VERSION 2
1459 
1460 /*
1461  * Store/Load a snapshot from disk, depending on the snapshot builder's state.
1462  *
1463  * Supposed to be used by external (i.e. not snapbuild.c) code that just read
1464  * a record that's a potential location for a serialized snapshot.
1465  */
1466 void
1468 {
1469  if (builder->state < SNAPBUILD_CONSISTENT)
1470  SnapBuildRestore(builder, lsn);
1471  else
1472  SnapBuildSerialize(builder, lsn);
1473 }
1474 
1475 /*
1476  * Serialize the snapshot 'builder' at the location 'lsn' if it hasn't already
1477  * been done by another decoding process.
1478  */
1479 static void
1481 {
1482  Size needed_length;
1483  SnapBuildOnDisk *ondisk;
1484  char *ondisk_c;
1485  int fd;
1486  char tmppath[MAXPGPATH];
1487  char path[MAXPGPATH];
1488  int ret;
1489  struct stat stat_buf;
1490  Size sz;
1491 
1492  Assert(lsn != InvalidXLogRecPtr);
1494  builder->last_serialized_snapshot <= lsn);
1495 
1496  /*
1497  * no point in serializing if we cannot continue to work immediately after
1498  * restoring the snapshot
1499  */
1500  if (builder->state < SNAPBUILD_CONSISTENT)
1501  return;
1502 
1503  /*
1504  * We identify snapshots by the LSN they are valid for. We don't need to
1505  * include timelines in the name as each LSN maps to exactly one timeline
1506  * unless the user used pg_resetwal or similar. If a user did so, there's
1507  * no hope continuing to decode anyway.
1508  */
1509  sprintf(path, "pg_logical/snapshots/%X-%X.snap",
1510  (uint32) (lsn >> 32), (uint32) lsn);
1511 
1512  /*
1513  * first check whether some other backend already has written the snapshot
1514  * for this LSN. It's perfectly fine if there's none, so we accept ENOENT
1515  * as a valid state. Everything else is an unexpected error.
1516  */
1517  ret = stat(path, &stat_buf);
1518 
1519  if (ret != 0 && errno != ENOENT)
1520  ereport(ERROR,
1522  errmsg("could not stat file \"%s\": %m", path)));
1523 
1524  else if (ret == 0)
1525  {
1526  /*
1527  * somebody else has already serialized to this point, don't overwrite
1528  * but remember location, so we don't need to read old data again.
1529  *
1530  * To be sure it has been synced to disk after the rename() from the
1531  * tempfile filename to the real filename, we just repeat the fsync.
1532  * That ought to be cheap because in most scenarios it should already
1533  * be safely on disk.
1534  */
1535  fsync_fname(path, false);
1536  fsync_fname("pg_logical/snapshots", true);
1537 
1538  builder->last_serialized_snapshot = lsn;
1539  goto out;
1540  }
1541 
1542  /*
1543  * there is an obvious race condition here between the time we stat(2) the
1544  * file and us writing the file. But we rename the file into place
1545  * atomically and all files created need to contain the same data anyway,
1546  * so this is perfectly fine, although a bit of a resource waste. Locking
1547  * seems like pointless complication.
1548  */
1549  elog(DEBUG1, "serializing snapshot to %s", path);
1550 
1551  /* to make sure only we will write to this tempfile, include pid */
1552  sprintf(tmppath, "pg_logical/snapshots/%X-%X.snap.%u.tmp",
1553  (uint32) (lsn >> 32), (uint32) lsn, MyProcPid);
1554 
1555  /*
1556  * Unlink temporary file if it already exists, needs to have been before a
1557  * crash/error since we won't enter this function twice from within a
1558  * single decoding slot/backend and the temporary file contains the pid of
1559  * the current process.
1560  */
1561  if (unlink(tmppath) != 0 && errno != ENOENT)
1562  ereport(ERROR,
1564  errmsg("could not remove file \"%s\": %m", tmppath)));
1565 
1566  needed_length = sizeof(SnapBuildOnDisk) +
1567  sizeof(TransactionId) * builder->committed.xcnt;
1568 
1569  ondisk_c = MemoryContextAllocZero(builder->context, needed_length);
1570  ondisk = (SnapBuildOnDisk *) ondisk_c;
1571  ondisk->magic = SNAPBUILD_MAGIC;
1572  ondisk->version = SNAPBUILD_VERSION;
1573  ondisk->length = needed_length;
1574  INIT_CRC32C(ondisk->checksum);
1575  COMP_CRC32C(ondisk->checksum,
1576  ((char *) ondisk) + SnapBuildOnDiskNotChecksummedSize,
1578  ondisk_c += sizeof(SnapBuildOnDisk);
1579 
1580  memcpy(&ondisk->builder, builder, sizeof(SnapBuild));
1581  /* NULL-ify memory-only data */
1582  ondisk->builder.context = NULL;
1583  ondisk->builder.snapshot = NULL;
1584  ondisk->builder.reorder = NULL;
1585  ondisk->builder.committed.xip = NULL;
1586 
1587  COMP_CRC32C(ondisk->checksum,
1588  &ondisk->builder,
1589  sizeof(SnapBuild));
1590 
1591  /* there shouldn't be any running xacts */
1592  Assert(builder->was_running.was_xcnt == 0);
1593 
1594  /* copy committed xacts */
1595  sz = sizeof(TransactionId) * builder->committed.xcnt;
1596  memcpy(ondisk_c, builder->committed.xip, sz);
1597  COMP_CRC32C(ondisk->checksum, ondisk_c, sz);
1598  ondisk_c += sz;
1599 
1600  FIN_CRC32C(ondisk->checksum);
1601 
1602  /* we have valid data now, open tempfile and write it there */
1603  fd = OpenTransientFile(tmppath,
1604  O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
1605  if (fd < 0)
1606  ereport(ERROR,
1608  errmsg("could not open file \"%s\": %m", tmppath)));
1609 
1610  errno = 0;
1612  if ((write(fd, ondisk, needed_length)) != needed_length)
1613  {
1614  int save_errno = errno;
1615 
1616  CloseTransientFile(fd);
1617 
1618  /* if write didn't set errno, assume problem is no disk space */
1619  errno = save_errno ? save_errno : ENOSPC;
1620  ereport(ERROR,
1622  errmsg("could not write to file \"%s\": %m", tmppath)));
1623  }
1625 
1626  /*
1627  * fsync the file before renaming so that even if we crash after this we
1628  * have either a fully valid file or nothing.
1629  *
1630  * It's safe to just ERROR on fsync() here because we'll retry the whole
1631  * operation including the writes.
1632  *
1633  * TODO: Do the fsync() via checkpoints/restartpoints, doing it here has
1634  * some noticeable overhead since it's performed synchronously during
1635  * decoding?
1636  */
1638  if (pg_fsync(fd) != 0)
1639  {
1640  int save_errno = errno;
1641 
1642  CloseTransientFile(fd);
1643  errno = save_errno;
1644  ereport(ERROR,
1646  errmsg("could not fsync file \"%s\": %m", tmppath)));
1647  }
1649 
1650  if (CloseTransientFile(fd) != 0)
1651  ereport(ERROR,
1653  errmsg("could not close file \"%s\": %m", tmppath)));
1654 
1655  fsync_fname("pg_logical/snapshots", true);
1656 
1657  /*
1658  * We may overwrite the work from some other backend, but that's ok, our
1659  * snapshot is valid as well, we'll just have done some superfluous work.
1660  */
1661  if (rename(tmppath, path) != 0)
1662  {
1663  ereport(ERROR,
1665  errmsg("could not rename file \"%s\" to \"%s\": %m",
1666  tmppath, path)));
1667  }
1668 
1669  /* make sure we persist */
1670  fsync_fname(path, false);
1671  fsync_fname("pg_logical/snapshots", true);
1672 
1673  /*
1674  * Now there's no way we can loose the dumped state anymore, remember this
1675  * as a serialization point.
1676  */
1677  builder->last_serialized_snapshot = lsn;
1678 
1679 out:
1681  builder->last_serialized_snapshot);
1682 }
1683 
1684 /*
1685  * Restore a snapshot into 'builder' if previously one has been stored at the
1686  * location indicated by 'lsn'. Returns true if successful, false otherwise.
1687  */
1688 static bool
1690 {
1691  SnapBuildOnDisk ondisk;
1692  int fd;
1693  char path[MAXPGPATH];
1694  Size sz;
1695  int readBytes;
1696  pg_crc32c checksum;
1697 
1698  /* no point in loading a snapshot if we're already there */
1699  if (builder->state == SNAPBUILD_CONSISTENT)
1700  return false;
1701 
1702  sprintf(path, "pg_logical/snapshots/%X-%X.snap",
1703  (uint32) (lsn >> 32), (uint32) lsn);
1704 
1705  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
1706 
1707  if (fd < 0 && errno == ENOENT)
1708  return false;
1709  else if (fd < 0)
1710  ereport(ERROR,
1712  errmsg("could not open file \"%s\": %m", path)));
1713 
1714  /* ----
1715  * Make sure the snapshot had been stored safely to disk, that's normally
1716  * cheap.
1717  * Note that we do not need PANIC here, nobody will be able to use the
1718  * slot without fsyncing, and saving it won't succeed without an fsync()
1719  * either...
1720  * ----
1721  */
1722  fsync_fname(path, false);
1723  fsync_fname("pg_logical/snapshots", true);
1724 
1725 
1726  /* read statically sized portion of snapshot */
1728  readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
1730  if (readBytes != SnapBuildOnDiskConstantSize)
1731  {
1732  int save_errno = errno;
1733 
1734  CloseTransientFile(fd);
1735 
1736  if (readBytes < 0)
1737  {
1738  errno = save_errno;
1739  ereport(ERROR,
1741  errmsg("could not read file \"%s\": %m", path)));
1742  }
1743  else
1744  ereport(ERROR,
1746  errmsg("could not read file \"%s\": read %d of %zu",
1747  path, readBytes,
1749  }
1750 
1751  if (ondisk.magic != SNAPBUILD_MAGIC)
1752  ereport(ERROR,
1754  errmsg("snapbuild state file \"%s\" has wrong magic number: %u instead of %u",
1755  path, ondisk.magic, SNAPBUILD_MAGIC)));
1756 
1757  if (ondisk.version != SNAPBUILD_VERSION)
1758  ereport(ERROR,
1760  errmsg("snapbuild state file \"%s\" has unsupported version: %u instead of %u",
1761  path, ondisk.version, SNAPBUILD_VERSION)));
1762 
1763  INIT_CRC32C(checksum);
1764  COMP_CRC32C(checksum,
1765  ((char *) &ondisk) + SnapBuildOnDiskNotChecksummedSize,
1767 
1768  /* read SnapBuild */
1770  readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
1772  if (readBytes != sizeof(SnapBuild))
1773  {
1774  int save_errno = errno;
1775 
1776  CloseTransientFile(fd);
1777 
1778  if (readBytes < 0)
1779  {
1780  errno = save_errno;
1781  ereport(ERROR,
1783  errmsg("could not read file \"%s\": %m", path)));
1784  }
1785  else
1786  ereport(ERROR,
1788  errmsg("could not read file \"%s\": read %d of %zu",
1789  path, readBytes, sizeof(SnapBuild))));
1790  }
1791  COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
1792 
1793  /* restore running xacts (dead, but kept for backward compat) */
1794  sz = sizeof(TransactionId) * ondisk.builder.was_running.was_xcnt_space;
1795  ondisk.builder.was_running.was_xip =
1796  MemoryContextAllocZero(builder->context, sz);
1798  readBytes = read(fd, ondisk.builder.was_running.was_xip, sz);
1800  if (readBytes != sz)
1801  {
1802  int save_errno = errno;
1803 
1804  CloseTransientFile(fd);
1805 
1806  if (readBytes < 0)
1807  {
1808  errno = save_errno;
1809  ereport(ERROR,
1811  errmsg("could not read file \"%s\": %m", path)));
1812  }
1813  else
1814  ereport(ERROR,
1816  errmsg("could not read file \"%s\": read %d of %zu",
1817  path, readBytes, sz)));
1818  }
1819  COMP_CRC32C(checksum, ondisk.builder.was_running.was_xip, sz);
1820 
1821  /* restore committed xacts information */
1822  sz = sizeof(TransactionId) * ondisk.builder.committed.xcnt;
1823  ondisk.builder.committed.xip = MemoryContextAllocZero(builder->context, sz);
1825  readBytes = read(fd, ondisk.builder.committed.xip, sz);
1827  if (readBytes != sz)
1828  {
1829  int save_errno = errno;
1830 
1831  CloseTransientFile(fd);
1832 
1833  if (readBytes < 0)
1834  {
1835  errno = save_errno;
1836  ereport(ERROR,
1838  errmsg("could not read file \"%s\": %m", path)));
1839  }
1840  else
1841  ereport(ERROR,
1843  errmsg("could not read file \"%s\": read %d of %zu",
1844  path, readBytes, sz)));
1845  }
1846  COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
1847 
1848  if (CloseTransientFile(fd) != 0)
1849  ereport(ERROR,
1851  errmsg("could not close file \"%s\": %m", path)));
1852 
1853  FIN_CRC32C(checksum);
1854 
1855  /* verify checksum of what we've read */
1856  if (!EQ_CRC32C(checksum, ondisk.checksum))
1857  ereport(ERROR,
1859  errmsg("checksum mismatch for snapbuild state file \"%s\": is %u, should be %u",
1860  path, checksum, ondisk.checksum)));
1861 
1862  /*
1863  * ok, we now have a sensible snapshot here, figure out if it has more
1864  * information than we have.
1865  */
1866 
1867  /*
1868  * We are only interested in consistent snapshots for now, comparing
1869  * whether one incomplete snapshot is more "advanced" seems to be
1870  * unnecessarily complex.
1871  */
1872  if (ondisk.builder.state < SNAPBUILD_CONSISTENT)
1873  goto snapshot_not_interesting;
1874 
1875  /*
1876  * Don't use a snapshot that requires an xmin that we cannot guarantee to
1877  * be available.
1878  */
1879  if (TransactionIdPrecedes(ondisk.builder.xmin, builder->initial_xmin_horizon))
1880  goto snapshot_not_interesting;
1881 
1882 
1883  /* ok, we think the snapshot is sensible, copy over everything important */
1884  builder->xmin = ondisk.builder.xmin;
1885  builder->xmax = ondisk.builder.xmax;
1886  builder->state = ondisk.builder.state;
1887 
1888  builder->committed.xcnt = ondisk.builder.committed.xcnt;
1889  /* We only allocated/stored xcnt, not xcnt_space xids ! */
1890  /* don't overwrite preallocated xip, if we don't have anything here */
1891  if (builder->committed.xcnt > 0)
1892  {
1893  pfree(builder->committed.xip);
1894  builder->committed.xcnt_space = ondisk.builder.committed.xcnt;
1895  builder->committed.xip = ondisk.builder.committed.xip;
1896  }
1897  ondisk.builder.committed.xip = NULL;
1898 
1899  /* our snapshot is not interesting anymore, build a new one */
1900  if (builder->snapshot != NULL)
1901  {
1903  }
1904  builder->snapshot = SnapBuildBuildSnapshot(builder);
1906 
1907  ReorderBufferSetRestartPoint(builder->reorder, lsn);
1908 
1909  Assert(builder->state == SNAPBUILD_CONSISTENT);
1910 
1911  ereport(LOG,
1912  (errmsg("logical decoding found consistent point at %X/%X",
1913  (uint32) (lsn >> 32), (uint32) lsn),
1914  errdetail("Logical decoding will begin using saved snapshot.")));
1915  return true;
1916 
1917 snapshot_not_interesting:
1918  if (ondisk.builder.committed.xip != NULL)
1919  pfree(ondisk.builder.committed.xip);
1920  return false;
1921 }
1922 
1923 /*
1924  * Remove all serialized snapshots that are not required anymore because no
1925  * slot can need them. This doesn't actually have to run during a checkpoint,
1926  * but it's a convenient point to schedule this.
1927  *
1928  * NB: We run this during checkpoints even if logical decoding is disabled so
1929  * we cleanup old slots at some point after it got disabled.
1930  */
1931 void
1933 {
1934  XLogRecPtr cutoff;
1935  XLogRecPtr redo;
1936  DIR *snap_dir;
1937  struct dirent *snap_de;
1938  char path[MAXPGPATH + 21];
1939 
1940  /*
1941  * We start off with a minimum of the last redo pointer. No new
1942  * replication slot will start before that, so that's a safe upper bound
1943  * for removal.
1944  */
1945  redo = GetRedoRecPtr();
1946 
1947  /* now check for the restart ptrs from existing slots */
1949 
1950  /* don't start earlier than the restart lsn */
1951  if (redo < cutoff)
1952  cutoff = redo;
1953 
1954  snap_dir = AllocateDir("pg_logical/snapshots");
1955  while ((snap_de = ReadDir(snap_dir, "pg_logical/snapshots")) != NULL)
1956  {
1957  uint32 hi;
1958  uint32 lo;
1959  XLogRecPtr lsn;
1960  struct stat statbuf;
1961 
1962  if (strcmp(snap_de->d_name, ".") == 0 ||
1963  strcmp(snap_de->d_name, "..") == 0)
1964  continue;
1965 
1966  snprintf(path, sizeof(path), "pg_logical/snapshots/%s", snap_de->d_name);
1967 
1968  if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
1969  {
1970  elog(DEBUG1, "only regular files expected: %s", path);
1971  continue;
1972  }
1973 
1974  /*
1975  * temporary filenames from SnapBuildSerialize() include the LSN and
1976  * everything but are postfixed by .$pid.tmp. We can just remove them
1977  * the same as other files because there can be none that are
1978  * currently being written that are older than cutoff.
1979  *
1980  * We just log a message if a file doesn't fit the pattern, it's
1981  * probably some editors lock/state file or similar...
1982  */
1983  if (sscanf(snap_de->d_name, "%X-%X.snap", &hi, &lo) != 2)
1984  {
1985  ereport(LOG,
1986  (errmsg("could not parse file name \"%s\"", path)));
1987  continue;
1988  }
1989 
1990  lsn = ((uint64) hi) << 32 | lo;
1991 
1992  /* check whether we still need it */
1993  if (lsn < cutoff || cutoff == InvalidXLogRecPtr)
1994  {
1995  elog(DEBUG1, "removing snapbuild snapshot %s", path);
1996 
1997  /*
1998  * It's not particularly harmful, though strange, if we can't
1999  * remove the file here. Don't prevent the checkpoint from
2000  * completing, that'd be a cure worse than the disease.
2001  */
2002  if (unlink(path) < 0)
2003  {
2004  ereport(LOG,
2006  errmsg("could not remove file \"%s\": %m",
2007  path)));
2008  continue;
2009  }
2010  }
2011  }
2012  FreeDir(snap_dir);
2013 }
#define TransactionIdAdvance(dest)
Definition: transam.h:75
static bool SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:1689
bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *rb, TransactionId xid)
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
uint32 CommandId
Definition: c.h:528
void AbortCurrentTransaction(void)
Definition: xact.c:3162
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
TransactionId was_xmin
Definition: snapbuild.c:203
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition: procarray.c:2177
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define AllocSetContextCreate
Definition: memutils.h:170
struct SnapBuild::@24 committed
#define DEBUG1
Definition: elog.h:25
int MyProcPid
Definition: globals.c:40
static void SnapBuildAddCommittedTxn(SnapBuild *builder, TransactionId xid)
Definition: snapbuild.c:853
#define SNAPBUILD_VERSION
Definition: snapbuild.c:1458
static void test(void)
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
uint32 TransactionId
Definition: c.h:514
bool copied
Definition: snapshot.h:185
static void SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff)
Definition: snapbuild.c:1384
bool SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr)
Definition: snapbuild.c:404
#define SNAPBUILD_MAGIC
Definition: snapbuild.c:1457
void SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, int nsubxacts, TransactionId *subxacts)
Definition: snapbuild.c:923
#define DEBUG3
Definition: elog.h:23
SnapBuildState SnapBuildCurrentState(SnapBuild *builder)
Definition: snapbuild.c:395
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:853
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:932
#define write(a, b, c)
Definition: win32.h:14
TransactionId xmin
Definition: proc.h:228
CommandId combocid
Definition: heapam_xlog.h:364
pg_crc32c checksum
Definition: snapbuild.c:1437
Snapshot snapshot
Definition: snapbuild.c:179
#define dlist_foreach(iter, lhead)
Definition: ilist.h:507
uint32 pg_crc32c
Definition: pg_crc32c.h:38
void LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
Definition: logical.c:932
ResourceOwner CurrentResourceOwner
Definition: resowner.c:142
XLogRecPtr current_restart_decoding_lsn
#define XACT_REPEATABLE_READ
Definition: xact.h:38
bool building_full_snapshot
Definition: snapbuild.c:174
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:617
void ReorderBufferSetBaseSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:349
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
int errcode(int sqlerrcode)
Definition: elog.c:608
CommandId cmax
Definition: heapam_xlog.h:363
bool IsTransactionOrTransactionBlock(void)
Definition: xact.c:4653
size_t xcnt_space
Definition: snapbuild.c:221
ItemPointerData target_tid
Definition: heapam_xlog.h:370
char * ExportSnapshot(Snapshot snapshot)
Definition: snapmgr.c:1191
bool suboverflowed
Definition: snapshot.h:182
void ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, RelFileNode node, ItemPointerData tid, CommandId cmin, CommandId cmax, CommandId combocid)
static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running)
Definition: snapbuild.c:1192
size_t xcnt
Definition: snapbuild.c:218
#define LOG
Definition: elog.h:26
bool RecoveryInProgress(void)
Definition: xlog.c:7935
Definition: dirent.h:9
uint32 regd_count
Definition: snapshot.h:199
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1222
static ResourceOwner SavedResourceOwnerDuringExport
Definition: snapbuild.c:252
PGXACT * MyPgXact
Definition: proc.c:68
#define FirstCommandId
Definition: c.h:530
int errdetail_internal(const char *fmt,...)
Definition: elog.c:982
bool SnapBuildProcessChange(SnapBuild *builder, TransactionId xid, XLogRecPtr lsn)
Definition: snapbuild.c:715
void ReorderBufferSetRestartPoint(ReorderBuffer *rb, XLogRecPtr ptr)
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1726
TransactionId xids[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:56
#define sprintf
Definition: port.h:194
const char * SnapBuildExportSnapshot(SnapBuild *builder)
Definition: snapbuild.c:631
SnapBuildState state
Definition: snapbuild.c:150
static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:1480
#define dlist_container(type, membername, ptr)
Definition: ilist.h:477
void pfree(void *pointer)
Definition: mcxt.c:1056
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:901
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
Definition: lmgr.h:26
struct SnapBuild::@23 was_running
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2292
TransactionId * was_xip
Definition: snapbuild.c:208
bool FirstSnapshotSet
Definition: snapmgr.c:205
size_t was_xcnt
Definition: snapbuild.c:206
#define MAXPGPATH
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
static void SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:807
static bool ExportInProgress
Definition: snapbuild.c:253
#define DEBUG2
Definition: elog.h:24
SnapBuild builder
Definition: snapbuild.c:1447
ReorderBuffer * reorder
Definition: snapbuild.c:189
TransactionId initial_xmin_horizon
Definition: snapbuild.c:171
TransactionId * xip
Definition: snapbuild.c:244
void SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid, XLogRecPtr lsn, xl_heap_new_cid *xlrec)
Definition: snapbuild.c:765
static void SnapBuildFreeSnapshot(Snapshot snap)
Definition: snapbuild.c:370
int errdetail(const char *fmt,...)
Definition: elog.c:955
static Snapshot SnapBuildBuildSnapshot(SnapBuild *builder)
Definition: snapbuild.c:460
int errcode_for_file_access(void)
Definition: elog.c:631
XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void)
Definition: slot.c:791
struct SnapshotData SnapshotData
bool includes_all_transactions
Definition: snapbuild.c:228
#define InvalidTransactionId
Definition: transam.h:31
void CheckPointSnapBuild(void)
Definition: snapbuild.c:1932
SnapshotType snapshot_type
Definition: snapshot.h:144
bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *rb, TransactionId xid)
unsigned int uint32
Definition: c.h:359
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2503
void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running)
Definition: snapbuild.c:1090
TransactionId xmax
Definition: snapshot.h:158
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1342
TransactionId xmin
Definition: snapshot.h:157
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
SnapBuildState
Definition: snapbuild.h:18
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define ereport(elevel, rest)
Definition: elog.h:141
XLogRecPtr last_serialized_snapshot
Definition: snapbuild.c:184
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId * xip
Definition: snapshot.h:168
#define S_ISREG(m)
Definition: win32_port.h:299
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:45
int CloseTransientFile(int fd)
Definition: fd.c:2469
TransactionId xmax
Definition: snapbuild.c:159
#define stat(a, b)
Definition: win32_port.h:255
RelFileNode target_node
Definition: heapam_xlog.h:369
TransactionId ReorderBufferGetOldestXmin(ReorderBuffer *rb)
void * palloc0(Size size)
Definition: mcxt.c:980
#define InvalidCommandId
Definition: c.h:531
dlist_head toplevel_by_lsn
TransactionId xid
dlist_node * cur
Definition: ilist.h:161
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:839
void ReorderBufferAddSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
CommandId curcid
Definition: snapshot.h:187
#define SnapBuildOnDiskConstantSize
Definition: snapbuild.c:1452
TransactionId was_xmax
Definition: snapbuild.c:204
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:1452
bool XactReadOnly
Definition: xact.c:77
void FreeSnapshotBuilder(SnapBuild *builder)
Definition: snapbuild.c:351
int errmsg_internal(const char *fmt,...)
Definition: elog.c:909
#define Max(x, y)
Definition: c.h:905
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:624
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:739
ReorderBufferTXN * ReorderBufferGetOldestTXN(ReorderBuffer *rb)
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2569
void LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
Definition: logical.c:875
void StartTransactionCommand(void)
Definition: xact.c:2797
#define SnapBuildOnDiskNotChecksummedSize
Definition: snapbuild.c:1454
bool takenDuringRecovery
Definition: snapshot.h:184
#define NormalTransactionIdFollows(id1, id2)
Definition: transam.h:103
size_t Size
Definition: c.h:467
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1318
int XactIsoLevel
Definition: xact.c:74
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1122
bool IsTransactionState(void)
Definition: xact.c:355
MemoryContext context
Definition: snapbuild.c:153
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:8208
TransactionId nextXid
Definition: standbydefs.h:52
#define NormalTransactionIdPrecedes(id1, id2)
Definition: transam.h:98
static void SnapBuildStartNextPhaseAt(SnapBuild *builder, TransactionId at)
Definition: snapbuild.c:294
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
void SnapBuildSnapDecRefcount(Snapshot snap)
Definition: snapbuild.c:428
#define lstat(path, sb)
Definition: win32_port.h:244
Snapshot SnapBuildInitialSnapshot(SnapBuild *builder)
Definition: snapbuild.c:539
uint32 xcnt
Definition: snapshot.h:169
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:822
TransactionId xmin
Definition: snapbuild.c:156
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
#define elog(elevel,...)
Definition: elog.h:228
void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:1467
XLogRecPtr restart_decoding_lsn
TransactionId oldestRunningXid
Definition: standbydefs.h:53
CommandId cmin
Definition: heapam_xlog.h:362
int pg_fsync(int fd)
Definition: fd.c:330
void ReorderBufferXidSetCatalogChanges(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
char d_name[MAX_PATH]
Definition: dirent.h:14
struct SnapBuildOnDisk SnapBuildOnDisk
#define qsort(a, b, c, d)
Definition: port.h:488
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
static void SnapBuildSnapIncRefcount(Snapshot snap)
Definition: snapbuild.c:416
#define snprintf
Definition: port.h:192
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
SnapBuild * AllocateSnapshotBuilder(ReorderBuffer *reorder, TransactionId xmin_horizon, XLogRecPtr start_lsn, bool need_full_snapshot)
Definition: snapbuild.c:310
static TransactionId SnapBuildNextPhaseAt(SnapBuild *builder)
Definition: snapbuild.c:280
XLogRecPtr start_decoding_at
Definition: snapbuild.c:165
static void SnapBuildPurgeCommittedTxn(SnapBuild *builder)
Definition: snapbuild.c:882
#define read(a, b, c)
Definition: win32.h:13
int FreeDir(DIR *dir)
Definition: fd.c:2621
size_t was_xcnt_space
Definition: snapbuild.c:207
void SnapBuildClearExportedSnapshot(void)
Definition: snapbuild.c:692
TransactionId * subxip
Definition: snapshot.h:180
uint32 active_count
Definition: snapshot.h:198
int xidComparator(const void *arg1, const void *arg2)
Definition: xid.c:138
int32 subxcnt
Definition: snapshot.h:181
TransactionId top_xid
Definition: heapam_xlog.h:361
void ReorderBufferAddNewCommandId(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, CommandId cid)
Snapshot SnapBuildGetOrBuildSnapshot(SnapBuild *builder, TransactionId xid)
Definition: snapbuild.c:671