PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
snapbuild.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * snapbuild.c
4  *
5  * Infrastructure for building historic catalog snapshots based on contents
6  * of the WAL, for the purpose of decoding heapam.c style values in the
7  * WAL.
8  *
9  * NOTES:
10  *
11  * We build snapshots which can *only* be used to read catalog contents and we
12  * do so by reading and interpreting the WAL stream. The aim is to build a
13  * snapshot that behaves the same as a freshly taken MVCC snapshot would have
14  * at the time the XLogRecord was generated.
15  *
16  * To build the snapshots we reuse the infrastructure built for Hot
17  * Standby. The in-memory snapshots we build look different than HS' because
18  * we have different needs. To successfully decode data from the WAL we only
19  * need to access catalog tables and (sys|rel|cat)cache, not the actual user
20  * tables since the data we decode is wholly contained in the WAL
21  * records. Also, our snapshots need to be different in comparison to normal
22  * MVCC ones because in contrast to those we cannot fully rely on the clog and
23  * pg_subtrans for information about committed transactions because they might
24  * commit in the future from the POV of the WAL entry we're currently
25  * decoding. This definition has the advantage that we only need to prevent
26  * removal of catalog rows, while normal table's rows can still be
27  * removed. This is achieved by using the replication slot mechanism.
28  *
29  * As the percentage of transactions modifying the catalog normally is fairly
30  * small in comparisons to ones only manipulating user data, we keep track of
31  * the committed catalog modifying ones inside [xmin, xmax) instead of keeping
32  * track of all running transactions like it's done in a normal snapshot. Note
33  * that we're generally only looking at transactions that have acquired an
34  * xid. That is we keep a list of transactions between snapshot->(xmin, xmax)
35  * that we consider committed, everything else is considered aborted/in
36  * progress. That also allows us not to care about subtransactions before they
37  * have committed which means this modules, in contrast to HS, doesn't have to
38  * care about suboverflowed subtransactions and similar.
39  *
40  * One complexity of doing this is that to e.g. handle mixed DDL/DML
41  * transactions we need Snapshots that see intermediate versions of the
42  * catalog in a transaction. During normal operation this is achieved by using
43  * CommandIds/cmin/cmax. The problem with that however is that for space
44  * efficiency reasons only one value of that is stored
45  * (c.f. combocid.c). Since ComboCids are only available in memory we log
46  * additional information which allows us to get the original (cmin, cmax)
47  * pair during visibility checks. Check the reorderbuffer.c's comment above
48  * ResolveCminCmaxDuringDecoding() for details.
49  *
50  * To facilitate all this we need our own visibility routine, as the normal
51  * ones are optimized for different usecases.
52  *
53  * To replace the normal catalog snapshots with decoding ones use the
54  * SetupHistoricSnapshot() and TeardownHistoricSnapshot() functions.
55  *
56  *
57  *
58  * The snapbuild machinery is starting up in several stages, as illustrated
59  * by the following graph:
60  * +-------------------------+
61  * +----|SNAPBUILD_START |-------------+
62  * | +-------------------------+ |
63  * | | |
64  * | | |
65  * | running_xacts with running xacts |
66  * | | |
67  * | | |
68  * | v |
69  * | +-------------------------+ v
70  * | |SNAPBUILD_FULL_SNAPSHOT |------------>|
71  * | +-------------------------+ |
72  * running_xacts | saved snapshot
73  * with zero xacts | at running_xacts's lsn
74  * | | |
75  * | all running toplevel TXNs finished |
76  * | | |
77  * | v |
78  * | +-------------------------+ |
79  * +--->|SNAPBUILD_CONSISTENT |<------------+
80  * +-------------------------+
81  *
82  * Initially the machinery is in the START stage. When an xl_running_xacts
83  * record is read that is sufficiently new (above the safe xmin horizon),
84  * there's a state transition. If there were no running xacts when the
85  * runnign_xacts record was generated, we'll directly go into CONSISTENT
86  * state, otherwise we'll switch to the FULL_SNAPSHOT state. Having a full
87  * snapshot means that all transactions that start henceforth can be decoded
88  * in their entirety, but transactions that started previously can't. In
89  * FULL_SNAPSHOT we'll switch into CONSISTENT once all those previously
90  * running transactions have committed or aborted.
91  *
92  * Only transactions that commit after CONSISTENT state has been reached will
93  * be replayed, even though they might have started while still in
94  * FULL_SNAPSHOT. That ensures that we'll reach a point where no previous
95  * changes has been exported, but all the following ones will be. That point
96  * is a convenient point to initialize replication from, which is why we
97  * export a snapshot at that point, which *can* be used to read normal data.
98  *
99  * Copyright (c) 2012-2017, PostgreSQL Global Development Group
100  *
101  * IDENTIFICATION
102  * src/backend/replication/snapbuild.c
103  *
104  *-------------------------------------------------------------------------
105  */
106 
107 #include "postgres.h"
108 
109 #include <sys/stat.h>
110 #include <unistd.h>
111 
112 #include "miscadmin.h"
113 
114 #include "access/heapam_xlog.h"
115 #include "access/transam.h"
116 #include "access/xact.h"
117 
118 #include "replication/logical.h"
120 #include "replication/snapbuild.h"
121 
122 #include "utils/builtins.h"
123 #include "utils/memutils.h"
124 #include "utils/snapshot.h"
125 #include "utils/snapmgr.h"
126 #include "utils/tqual.h"
127 
128 #include "storage/block.h" /* debugging output */
129 #include "storage/fd.h"
130 #include "storage/lmgr.h"
131 #include "storage/proc.h"
132 #include "storage/procarray.h"
133 #include "storage/standby.h"
134 
135 /*
136  * This struct contains the current state of the snapshot building
137  * machinery. Besides a forward declaration in the header, it is not exposed
138  * to the public, so we can easily change its contents.
139  */
140 struct SnapBuild
141 {
142  /* how far are we along building our first full snapshot */
144 
145  /* private memory context used to allocate memory for this module. */
147 
148  /* all transactions < than this have committed/aborted */
150 
151  /* all transactions >= than this are uncommitted */
153 
154  /*
155  * Don't replay commits from an LSN < this LSN. This can be set externally
156  * but it will also be advanced (never retreat) from within snapbuild.c.
157  */
159 
160  /*
161  * Don't start decoding WAL until the "xl_running_xacts" information
162  * indicates there are no running xids with an xid smaller than this.
163  */
165 
166  /*
167  * Snapshot that's valid to see the catalog state seen at this moment.
168  */
170 
171  /*
172  * LSN of the last location we are sure a snapshot has been serialized to.
173  */
175 
176  /*
177  * The reorderbuffer we need to update with usable snapshots et al.
178  */
180 
181  /*
182  * Information about initially running transactions
183  *
184  * When we start building a snapshot there already may be transactions in
185  * progress. Those are stored in running.xip. We don't have enough
186  * information about those to decode their contents, so until they are
187  * finished (xcnt=0) we cannot switch to a CONSISTENT state.
188  */
189  struct
190  {
191  /*
192  * As long as running.xcnt all XIDs < running.xmin and > running.xmax
193  * have to be checked whether they still are running.
194  */
197 
198  size_t xcnt; /* number of used xip entries */
199  size_t xcnt_space; /* allocated size of xip */
200  TransactionId *xip; /* running xacts array, xidComparator-sorted */
201  } running;
202 
203  /*
204  * Array of transactions which could have catalog changes that committed
205  * between xmin and xmax.
206  */
207  struct
208  {
209  /* number of committed transactions */
210  size_t xcnt;
211 
212  /* available space for committed transactions */
213  size_t xcnt_space;
214 
215  /*
216  * Until we reach a CONSISTENT state, we record commits of all
217  * transactions, not just the catalog changing ones. Record when that
218  * changes so we know we cannot export a snapshot safely anymore.
219  */
221 
222  /*
223  * Array of committed transactions that have modified the catalog.
224  *
225  * As this array is frequently modified we do *not* keep it in
226  * xidComparator order. Instead we sort the array when building &
227  * distributing a snapshot.
228  *
229  * TODO: It's unclear whether that reasoning has much merit. Every
230  * time we add something here after becoming consistent will also
231  * require distributing a snapshot. Storing them sorted would
232  * potentially also make it easier to purge (but more complicated wrt
233  * wraparound?). Should be improved if sorting while building the
234  * snapshot shows up in profiles.
235  */
237  } committed;
238 };
239 
240 /*
241  * Starting a transaction -- which we need to do while exporting a snapshot --
242  * removes knowledge about the previously used resowner, so we save it here.
243  */
245 static bool ExportInProgress = false;
246 
247 /* transaction state manipulation functions */
248 static void SnapBuildEndTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid);
249 
250 /* ->running manipulation */
251 static bool SnapBuildTxnIsRunning(SnapBuild *builder, TransactionId xid);
252 
253 /* ->committed manipulation */
254 static void SnapBuildPurgeCommittedTxn(SnapBuild *builder);
255 
256 /* snapshot building/manipulation/distribution functions */
258 
259 static void SnapBuildFreeSnapshot(Snapshot snap);
260 
261 static void SnapBuildSnapIncRefcount(Snapshot snap);
262 
264 
265 /* xlog reading helper functions for SnapBuildProcessRecord */
266 static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running);
267 
268 /* serialization functions */
269 static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn);
270 static bool SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn);
271 
272 
273 /*
274  * Allocate a new snapshot builder.
275  *
276  * xmin_horizon is the xid >=which we can be sure no catalog rows have been
277  * removed, start_lsn is the LSN >= we want to replay commits.
278  */
279 SnapBuild *
281  TransactionId xmin_horizon,
282  XLogRecPtr start_lsn)
283 {
284  MemoryContext context;
285  MemoryContext oldcontext;
286  SnapBuild *builder;
287 
288  /* allocate memory in own context, to have better accountability */
290  "snapshot builder context",
292  oldcontext = MemoryContextSwitchTo(context);
293 
294  builder = palloc0(sizeof(SnapBuild));
295 
296  builder->state = SNAPBUILD_START;
297  builder->context = context;
298  builder->reorder = reorder;
299  /* Other struct members initialized by zeroing via palloc0 above */
300 
301  builder->committed.xcnt = 0;
302  builder->committed.xcnt_space = 128; /* arbitrary number */
303  builder->committed.xip =
304  palloc0(builder->committed.xcnt_space * sizeof(TransactionId));
305  builder->committed.includes_all_transactions = true;
306 
307  builder->initial_xmin_horizon = xmin_horizon;
308  builder->start_decoding_at = start_lsn;
309 
310  MemoryContextSwitchTo(oldcontext);
311 
312  return builder;
313 }
314 
315 /*
316  * Free a snapshot builder.
317  */
318 void
320 {
321  MemoryContext context = builder->context;
322 
323  /* free snapshot explicitly, that contains some error checking */
324  if (builder->snapshot != NULL)
325  {
327  builder->snapshot = NULL;
328  }
329 
330  /* other resources are deallocated via memory context reset */
331  MemoryContextDelete(context);
332 }
333 
334 /*
335  * Free an unreferenced snapshot that has previously been built by us.
336  */
337 static void
339 {
340  /* make sure we don't get passed an external snapshot */
342 
343  /* make sure nobody modified our snapshot */
344  Assert(snap->curcid == FirstCommandId);
345  Assert(!snap->suboverflowed);
346  Assert(!snap->takenDuringRecovery);
347  Assert(snap->regd_count == 0);
348 
349  /* slightly more likely, so it's checked even without c-asserts */
350  if (snap->copied)
351  elog(ERROR, "cannot free a copied snapshot");
352 
353  if (snap->active_count)
354  elog(ERROR, "cannot free an active snapshot");
355 
356  pfree(snap);
357 }
358 
359 /*
360  * In which state of snapshot building are we?
361  */
364 {
365  return builder->state;
366 }
367 
368 /*
369  * Should the contents of transaction ending at 'ptr' be decoded?
370  */
371 bool
373 {
374  return ptr < builder->start_decoding_at;
375 }
376 
377 /*
378  * Increase refcount of a snapshot.
379  *
380  * This is used when handing out a snapshot to some external resource or when
381  * adding a Snapshot as builder->snapshot.
382  */
383 static void
385 {
386  snap->active_count++;
387 }
388 
389 /*
390  * Decrease refcount of a snapshot and free if the refcount reaches zero.
391  *
392  * Externally visible, so that external resources that have been handed an
393  * IncRef'ed Snapshot can adjust its refcount easily.
394  */
395 void
397 {
398  /* make sure we don't get passed an external snapshot */
400 
401  /* make sure nobody modified our snapshot */
402  Assert(snap->curcid == FirstCommandId);
403  Assert(!snap->suboverflowed);
404  Assert(!snap->takenDuringRecovery);
405 
406  Assert(snap->regd_count == 0);
407 
408  Assert(snap->active_count > 0);
409 
410  /* slightly more likely, so it's checked even without casserts */
411  if (snap->copied)
412  elog(ERROR, "cannot free a copied snapshot");
413 
414  snap->active_count--;
415  if (snap->active_count == 0)
416  SnapBuildFreeSnapshot(snap);
417 }
418 
419 /*
420  * Build a new snapshot, based on currently committed catalog-modifying
421  * transactions.
422  *
423  * In-progress transactions with catalog access are *not* allowed to modify
424  * these snapshots; they have to copy them and fill in appropriate ->curcid
425  * and ->subxip/subxcnt values.
426  */
427 static Snapshot
429 {
430  Snapshot snapshot;
431  Size ssize;
432 
433  Assert(builder->state >= SNAPBUILD_FULL_SNAPSHOT);
434 
435  ssize = sizeof(SnapshotData)
436  + sizeof(TransactionId) * builder->committed.xcnt
437  + sizeof(TransactionId) * 1 /* toplevel xid */ ;
438 
439  snapshot = MemoryContextAllocZero(builder->context, ssize);
440 
442 
443  /*
444  * We misuse the original meaning of SnapshotData's xip and subxip fields
445  * to make the more fitting for our needs.
446  *
447  * In the 'xip' array we store transactions that have to be treated as
448  * committed. Since we will only ever look at tuples from transactions
449  * that have modified the catalog it's more efficient to store those few
450  * that exist between xmin and xmax (frequently there are none).
451  *
452  * Snapshots that are used in transactions that have modified the catalog
453  * also use the 'subxip' array to store their toplevel xid and all the
454  * subtransaction xids so we can recognize when we need to treat rows as
455  * visible that are not in xip but still need to be visible. Subxip only
456  * gets filled when the transaction is copied into the context of a
457  * catalog modifying transaction since we otherwise share a snapshot
458  * between transactions. As long as a txn hasn't modified the catalog it
459  * doesn't need to treat any uncommitted rows as visible, so there is no
460  * need for those xids.
461  *
462  * Both arrays are qsort'ed so that we can use bsearch() on them.
463  */
464  Assert(TransactionIdIsNormal(builder->xmin));
465  Assert(TransactionIdIsNormal(builder->xmax));
466 
467  snapshot->xmin = builder->xmin;
468  snapshot->xmax = builder->xmax;
469 
470  /* store all transactions to be treated as committed by this snapshot */
471  snapshot->xip =
472  (TransactionId *) ((char *) snapshot + sizeof(SnapshotData));
473  snapshot->xcnt = builder->committed.xcnt;
474  memcpy(snapshot->xip,
475  builder->committed.xip,
476  builder->committed.xcnt * sizeof(TransactionId));
477 
478  /* sort so we can bsearch() */
479  qsort(snapshot->xip, snapshot->xcnt, sizeof(TransactionId), xidComparator);
480 
481  /*
482  * Initially, subxip is empty, i.e. it's a snapshot to be used by
483  * transactions that don't modify the catalog. Will be filled by
484  * ReorderBufferCopySnap() if necessary.
485  */
486  snapshot->subxcnt = 0;
487  snapshot->subxip = NULL;
488 
489  snapshot->suboverflowed = false;
490  snapshot->takenDuringRecovery = false;
491  snapshot->copied = false;
492  snapshot->curcid = FirstCommandId;
493  snapshot->active_count = 0;
494  snapshot->regd_count = 0;
495 
496  return snapshot;
497 }
498 
499 /*
500  * Export a snapshot so it can be set in another session with SET TRANSACTION
501  * SNAPSHOT.
502  *
503  * For that we need to start a transaction in the current backend as the
504  * importing side checks whether the source transaction is still open to make
505  * sure the xmin horizon hasn't advanced since then.
506  *
507  * After that we convert a locally built snapshot into the normal variant
508  * understood by HeapTupleSatisfiesMVCC et al.
509  */
510 const char *
512 {
513  Snapshot snap;
514  char *snapname;
515  TransactionId xid;
516  TransactionId *newxip;
517  int newxcnt = 0;
518 
519  if (builder->state != SNAPBUILD_CONSISTENT)
520  elog(ERROR, "cannot export a snapshot before reaching a consistent state");
521 
522  if (!builder->committed.includes_all_transactions)
523  elog(ERROR, "cannot export a snapshot, not all transactions are monitored anymore");
524 
525  /* so we don't overwrite the existing value */
527  elog(ERROR, "cannot export a snapshot when MyPgXact->xmin already is valid");
528 
530  elog(ERROR, "cannot export a snapshot from within a transaction");
531 
532  if (SavedResourceOwnerDuringExport)
533  elog(ERROR, "can only export one snapshot at a time");
534 
535  SavedResourceOwnerDuringExport = CurrentResourceOwner;
536  ExportInProgress = true;
537 
539 
541 
542  /* There doesn't seem to a nice API to set these */
544  XactReadOnly = true;
545 
546  snap = SnapBuildBuildSnapshot(builder, GetTopTransactionId());
547 
548  /*
549  * We know that snap->xmin is alive, enforced by the logical xmin
550  * mechanism. Due to that we can do this without locks, we're only
551  * changing our own value.
552  */
553  MyPgXact->xmin = snap->xmin;
554 
555  /* allocate in transaction context */
556  newxip = (TransactionId *)
558 
559  /*
560  * snapbuild.c builds transactions in an "inverted" manner, which means it
561  * stores committed transactions in ->xip, not ones in progress. Build a
562  * classical snapshot by marking all non-committed transactions as
563  * in-progress. This can be expensive.
564  */
565  for (xid = snap->xmin; NormalTransactionIdPrecedes(xid, snap->xmax);)
566  {
567  void *test;
568 
569  /*
570  * Check whether transaction committed using the decoding snapshot
571  * meaning of ->xip.
572  */
573  test = bsearch(&xid, snap->xip, snap->xcnt,
574  sizeof(TransactionId), xidComparator);
575 
576  if (test == NULL)
577  {
578  if (newxcnt >= GetMaxSnapshotXidCount())
579  elog(ERROR, "snapshot too large");
580 
581  newxip[newxcnt++] = xid;
582  }
583 
585  }
586 
587  snap->xcnt = newxcnt;
588  snap->xip = newxip;
589 
590  /*
591  * now that we've built a plain snapshot, use the normal mechanisms for
592  * exporting it
593  */
594  snapname = ExportSnapshot(snap);
595 
596  ereport(LOG,
597  (errmsg_plural("exported logical decoding snapshot: \"%s\" with %u transaction ID",
598  "exported logical decoding snapshot: \"%s\" with %u transaction IDs",
599  snap->xcnt,
600  snapname, snap->xcnt)));
601  return snapname;
602 }
603 
604 /*
605  * Ensure there is a snapshot and if not build one for current transaction.
606  */
607 Snapshot
609 {
610  Assert(builder->state == SNAPBUILD_CONSISTENT);
611 
612  /* only build a new snapshot if we don't have a prebuilt one */
613  if (builder->snapshot == NULL)
614  {
615  builder->snapshot = SnapBuildBuildSnapshot(builder, xid);
616  /* increase refcount for the snapshot builder */
618  }
619 
620  return builder->snapshot;
621 }
622 
623 /*
624  * Reset a previously SnapBuildExportSnapshot()'ed snapshot if there is
625  * any. Aborts the previously started transaction and resets the resource
626  * owner back to its original value.
627  */
628 void
630 {
631  /* nothing exported, that is the usual case */
632  if (!ExportInProgress)
633  return;
634 
635  if (!IsTransactionState())
636  elog(ERROR, "clearing exported snapshot in wrong transaction state");
637 
638  /* make sure nothing could have ever happened */
640 
642  SavedResourceOwnerDuringExport = NULL;
643  ExportInProgress = false;
644 }
645 
646 /*
647  * Handle the effects of a single heap change, appropriate to the current state
648  * of the snapshot builder and returns whether changes made at (xid, lsn) can
649  * be decoded.
650  */
651 bool
653 {
654  /*
655  * We can't handle data in transactions if we haven't built a snapshot
656  * yet, so don't store them.
657  */
658  if (builder->state < SNAPBUILD_FULL_SNAPSHOT)
659  return false;
660 
661  /*
662  * No point in keeping track of changes in transactions that we don't have
663  * enough information about to decode. This means that they started before
664  * we got into the SNAPBUILD_FULL_SNAPSHOT state.
665  */
666  if (builder->state < SNAPBUILD_CONSISTENT &&
667  SnapBuildTxnIsRunning(builder, xid))
668  return false;
669 
670  /*
671  * If the reorderbuffer doesn't yet have a snapshot, add one now, it will
672  * be needed to decode the change we're currently processing.
673  */
674  if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, xid))
675  {
676  /* only build a new snapshot if we don't have a prebuilt one */
677  if (builder->snapshot == NULL)
678  {
679  builder->snapshot = SnapBuildBuildSnapshot(builder, xid);
680  /* increase refcount for the snapshot builder */
682  }
683 
684  /*
685  * Increase refcount for the transaction we're handing the snapshot
686  * out to.
687  */
689  ReorderBufferSetBaseSnapshot(builder->reorder, xid, lsn,
690  builder->snapshot);
691  }
692 
693  return true;
694 }
695 
696 /*
697  * Do CommandId/ComboCid handling after reading an xl_heap_new_cid record.
698  * This implies that a transaction has done some form of write to system
699  * catalogs.
700  */
701 void
703  XLogRecPtr lsn, xl_heap_new_cid *xlrec)
704 {
705  CommandId cid;
706 
707  /*
708  * we only log new_cid's if a catalog tuple was modified, so mark the
709  * transaction as containing catalog modifications
710  */
711  ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
712 
713  ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
714  xlrec->target_node, xlrec->target_tid,
715  xlrec->cmin, xlrec->cmax,
716  xlrec->combocid);
717 
718  /* figure out new command id */
719  if (xlrec->cmin != InvalidCommandId &&
720  xlrec->cmax != InvalidCommandId)
721  cid = Max(xlrec->cmin, xlrec->cmax);
722  else if (xlrec->cmax != InvalidCommandId)
723  cid = xlrec->cmax;
724  else if (xlrec->cmin != InvalidCommandId)
725  cid = xlrec->cmin;
726  else
727  {
728  cid = InvalidCommandId; /* silence compiler */
729  elog(ERROR, "xl_heap_new_cid record without a valid CommandId");
730  }
731 
732  ReorderBufferAddNewCommandId(builder->reorder, xid, lsn, cid + 1);
733 }
734 
735 /*
736  * Check whether `xid` is currently 'running'.
737  *
738  * Running transactions in our parlance are transactions which we didn't
739  * observe from the start so we can't properly decode their contents. They
740  * only exist after we freshly started from an < CONSISTENT snapshot.
741  */
742 static bool
744 {
745  Assert(builder->state < SNAPBUILD_CONSISTENT);
748 
749  if (builder->running.xcnt &&
750  NormalTransactionIdFollows(xid, builder->running.xmin) &&
752  {
753  TransactionId *search =
754  bsearch(&xid, builder->running.xip, builder->running.xcnt_space,
755  sizeof(TransactionId), xidComparator);
756 
757  if (search != NULL)
758  {
759  Assert(*search == xid);
760  return true;
761  }
762  }
763 
764  return false;
765 }
766 
767 /*
768  * Add a new Snapshot to all transactions we're decoding that currently are
769  * in-progress so they can see new catalog contents made by the transaction
770  * that just committed. This is necessary because those in-progress
771  * transactions will use the new catalog's contents from here on (at the very
772  * least everything they do needs to be compatible with newer catalog
773  * contents).
774  */
775 static void
777 {
778  dlist_iter txn_i;
779  ReorderBufferTXN *txn;
780 
781  /*
782  * Iterate through all toplevel transactions. This can include
783  * subtransactions which we just don't yet know to be that, but that's
784  * fine, they will just get an unnecessary snapshot queued.
785  */
786  dlist_foreach(txn_i, &builder->reorder->toplevel_by_lsn)
787  {
788  txn = dlist_container(ReorderBufferTXN, node, txn_i.cur);
789 
791 
792  /*
793  * If we don't have a base snapshot yet, there are no changes in this
794  * transaction which in turn implies we don't yet need a snapshot at
795  * all. We'll add a snapshot when the first change gets queued.
796  *
797  * NB: This works correctly even for subtransactions because
798  * ReorderBufferCommitChild() takes care to pass the parent the base
799  * snapshot, and while iterating the changequeue we'll get the change
800  * from the subtxn.
801  */
802  if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, txn->xid))
803  continue;
804 
805  elog(DEBUG2, "adding a new snapshot to %u at %X/%X",
806  txn->xid, (uint32) (lsn >> 32), (uint32) lsn);
807 
808  /*
809  * increase the snapshot's refcount for the transaction we are handing
810  * it out to
811  */
813  ReorderBufferAddSnapshot(builder->reorder, txn->xid, lsn,
814  builder->snapshot);
815  }
816 }
817 
818 /*
819  * Keep track of a new catalog changing transaction that has committed.
820  */
821 static void
823 {
825 
826  if (builder->committed.xcnt == builder->committed.xcnt_space)
827  {
828  builder->committed.xcnt_space = builder->committed.xcnt_space * 2 + 1;
829 
830  elog(DEBUG1, "increasing space for committed transactions to %u",
831  (uint32) builder->committed.xcnt_space);
832 
833  builder->committed.xip = repalloc(builder->committed.xip,
834  builder->committed.xcnt_space * sizeof(TransactionId));
835  }
836 
837  /*
838  * TODO: It might make sense to keep the array sorted here instead of
839  * doing it every time we build a new snapshot. On the other hand this
840  * gets called repeatedly when a transaction with subtransactions commits.
841  */
842  builder->committed.xip[builder->committed.xcnt++] = xid;
843 }
844 
845 /*
846  * Remove knowledge about transactions we treat as committed that are smaller
847  * than ->xmin. Those won't ever get checked via the ->committed array but via
848  * the clog machinery, so we don't need to waste memory on them.
849  */
850 static void
852 {
853  int off;
854  TransactionId *workspace;
855  int surviving_xids = 0;
856 
857  /* not ready yet */
858  if (!TransactionIdIsNormal(builder->xmin))
859  return;
860 
861  /* TODO: Neater algorithm than just copying and iterating? */
862  workspace =
863  MemoryContextAlloc(builder->context,
864  builder->committed.xcnt * sizeof(TransactionId));
865 
866  /* copy xids that still are interesting to workspace */
867  for (off = 0; off < builder->committed.xcnt; off++)
868  {
869  if (NormalTransactionIdPrecedes(builder->committed.xip[off],
870  builder->xmin))
871  ; /* remove */
872  else
873  workspace[surviving_xids++] = builder->committed.xip[off];
874  }
875 
876  /* copy workspace back to persistent state */
877  memcpy(builder->committed.xip, workspace,
878  surviving_xids * sizeof(TransactionId));
879 
880  elog(DEBUG3, "purged committed transactions from %u to %u, xmin: %u, xmax: %u",
881  (uint32) builder->committed.xcnt, (uint32) surviving_xids,
882  builder->xmin, builder->xmax);
883  builder->committed.xcnt = surviving_xids;
884 
885  pfree(workspace);
886 }
887 
888 /*
889  * Common logic for SnapBuildAbortTxn and SnapBuildCommitTxn dealing with
890  * keeping track of the amount of running transactions.
891  */
892 static void
894 {
895  if (builder->state == SNAPBUILD_CONSISTENT)
896  return;
897 
898  /*
899  * NB: This handles subtransactions correctly even if we started from
900  * suboverflowed xl_running_xacts because we only keep track of toplevel
901  * transactions. Since the latter are always allocated before their
902  * subxids and since they end at the same time it's sufficient to deal
903  * with them here.
904  */
905  if (SnapBuildTxnIsRunning(builder, xid))
906  {
907  Assert(builder->running.xcnt > 0);
908 
909  if (!--builder->running.xcnt)
910  {
911  /*
912  * None of the originally running transaction is running anymore,
913  * so our incrementally built snapshot now is consistent.
914  */
915  ereport(LOG,
916  (errmsg("logical decoding found consistent point at %X/%X",
917  (uint32) (lsn >> 32), (uint32) lsn),
918  errdetail("Transaction ID %u finished; no more running transactions.",
919  xid)));
920  builder->state = SNAPBUILD_CONSISTENT;
921  }
922  }
923 }
924 
925 /*
926  * Abort a transaction, throw away all state we kept.
927  */
928 void
930  TransactionId xid,
931  int nsubxacts, TransactionId *subxacts)
932 {
933  int i;
934 
935  for (i = 0; i < nsubxacts; i++)
936  {
937  TransactionId subxid = subxacts[i];
938 
939  SnapBuildEndTxn(builder, lsn, subxid);
940  }
941 
942  SnapBuildEndTxn(builder, lsn, xid);
943 }
944 
945 /*
946  * Handle everything that needs to be done when a transaction commits
947  */
948 void
950  int nsubxacts, TransactionId *subxacts)
951 {
952  int nxact;
953 
954  bool forced_timetravel = false;
955  bool sub_needs_timetravel = false;
956  bool top_needs_timetravel = false;
957 
958  TransactionId xmax = xid;
959 
960  /*
961  * If we couldn't observe every change of a transaction because it was
962  * already running at the point we started to observe we have to assume it
963  * made catalog changes.
964  *
965  * This has the positive benefit that we afterwards have enough
966  * information to build an exportable snapshot that's usable by pg_dump et
967  * al.
968  */
969  if (builder->state < SNAPBUILD_CONSISTENT)
970  {
971  /* ensure that only commits after this are getting replayed */
972  if (builder->start_decoding_at <= lsn)
973  builder->start_decoding_at = lsn + 1;
974 
975  /*
976  * We could avoid treating !SnapBuildTxnIsRunning transactions as
977  * timetravel ones, but we want to be able to export a snapshot when
978  * we reached consistency.
979  */
980  forced_timetravel = true;
981  elog(DEBUG1, "forced to assume catalog changes for xid %u because it was running too early", xid);
982  }
983 
984  for (nxact = 0; nxact < nsubxacts; nxact++)
985  {
986  TransactionId subxid = subxacts[nxact];
987 
988  /*
989  * make sure txn is not tracked in running txn's anymore, switch state
990  */
991  SnapBuildEndTxn(builder, lsn, subxid);
992 
993  /*
994  * If we're forcing timetravel we also need visibility information
995  * about subtransaction, so keep track of subtransaction's state.
996  */
997  if (forced_timetravel)
998  {
999  SnapBuildAddCommittedTxn(builder, subxid);
1000  if (NormalTransactionIdFollows(subxid, xmax))
1001  xmax = subxid;
1002  }
1003 
1004  /*
1005  * Add subtransaction to base snapshot if it DDL, we don't distinguish
1006  * to toplevel transactions there.
1007  */
1008  else if (ReorderBufferXidHasCatalogChanges(builder->reorder, subxid))
1009  {
1010  sub_needs_timetravel = true;
1011 
1012  elog(DEBUG1, "found subtransaction %u:%u with catalog changes.",
1013  xid, subxid);
1014 
1015  SnapBuildAddCommittedTxn(builder, subxid);
1016 
1017  if (NormalTransactionIdFollows(subxid, xmax))
1018  xmax = subxid;
1019  }
1020  }
1021 
1022  /*
1023  * Make sure toplevel txn is not tracked in running txn's anymore, switch
1024  * state to consistent if possible.
1025  */
1026  SnapBuildEndTxn(builder, lsn, xid);
1027 
1028  if (forced_timetravel)
1029  {
1030  elog(DEBUG2, "forced transaction %u to do timetravel.", xid);
1031 
1032  SnapBuildAddCommittedTxn(builder, xid);
1033  }
1034  /* add toplevel transaction to base snapshot */
1035  else if (ReorderBufferXidHasCatalogChanges(builder->reorder, xid))
1036  {
1037  elog(DEBUG2, "found top level transaction %u, with catalog changes!",
1038  xid);
1039 
1040  top_needs_timetravel = true;
1041  SnapBuildAddCommittedTxn(builder, xid);
1042  }
1043  else if (sub_needs_timetravel)
1044  {
1045  /* mark toplevel txn as timetravel as well */
1046  SnapBuildAddCommittedTxn(builder, xid);
1047  }
1048 
1049  /* if there's any reason to build a historic snapshot, do so now */
1050  if (forced_timetravel || top_needs_timetravel || sub_needs_timetravel)
1051  {
1052  /*
1053  * Adjust xmax of the snapshot builder, we only do that for committed,
1054  * catalog modifying, transactions, everything else isn't interesting
1055  * for us since we'll never look at the respective rows.
1056  */
1057  if (!TransactionIdIsValid(builder->xmax) ||
1058  TransactionIdFollowsOrEquals(xmax, builder->xmax))
1059  {
1060  builder->xmax = xmax;
1061  TransactionIdAdvance(builder->xmax);
1062  }
1063 
1064  /*
1065  * If we haven't built a complete snapshot yet there's no need to hand
1066  * it out, it wouldn't (and couldn't) be used anyway.
1067  */
1068  if (builder->state < SNAPBUILD_FULL_SNAPSHOT)
1069  return;
1070 
1071  /*
1072  * Decrease the snapshot builder's refcount of the old snapshot, note
1073  * that it still will be used if it has been handed out to the
1074  * reorderbuffer earlier.
1075  */
1076  if (builder->snapshot)
1078 
1079  builder->snapshot = SnapBuildBuildSnapshot(builder, xid);
1080 
1081  /* we might need to execute invalidations, add snapshot */
1082  if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, xid))
1083  {
1085  ReorderBufferSetBaseSnapshot(builder->reorder, xid, lsn,
1086  builder->snapshot);
1087  }
1088 
1089  /* refcount of the snapshot builder for the new snapshot */
1091 
1092  /* add a new Snapshot to all currently running transactions */
1094  }
1095  else
1096  {
1097  /* record that we cannot export a general snapshot anymore */
1098  builder->committed.includes_all_transactions = false;
1099  }
1100 }
1101 
1102 
1103 /* -----------------------------------
1104  * Snapshot building functions dealing with xlog records
1105  * -----------------------------------
1106  */
1107 
1108 /*
1109  * Process a running xacts record, and use its information to first build a
1110  * historic snapshot and later to release resources that aren't needed
1111  * anymore.
1112  */
1113 void
1115 {
1116  ReorderBufferTXN *txn;
1117 
1118  /*
1119  * If we're not consistent yet, inspect the record to see whether it
1120  * allows to get closer to being consistent. If we are consistent, dump
1121  * our snapshot so others or we, after a restart, can use it.
1122  */
1123  if (builder->state < SNAPBUILD_CONSISTENT)
1124  {
1125  /* returns false if there's no point in performing cleanup just yet */
1126  if (!SnapBuildFindSnapshot(builder, lsn, running))
1127  return;
1128  }
1129  else
1130  SnapBuildSerialize(builder, lsn);
1131 
1132  /*
1133  * Update range of interesting xids based on the running xacts
1134  * information. We don't increase ->xmax using it, because once we are in
1135  * a consistent state we can do that ourselves and much more efficiently
1136  * so, because we only need to do it for catalog transactions since we
1137  * only ever look at those.
1138  *
1139  * NB: Because of that xmax can be lower than xmin, because we only
1140  * increase xmax when a catalog modifying transaction commits. While odd
1141  * looking, it's correct and actually more efficient this way since we hit
1142  * fast paths in tqual.c.
1143  */
1144  builder->xmin = running->oldestRunningXid;
1145 
1146  /* Remove transactions we don't need to keep track off anymore */
1147  SnapBuildPurgeCommittedTxn(builder);
1148 
1149  elog(DEBUG3, "xmin: %u, xmax: %u, oldestrunning: %u",
1150  builder->xmin, builder->xmax,
1151  running->oldestRunningXid);
1152 
1153  /*
1154  * Inrease shared memory limits, so vacuum can work on tuples we prevented
1155  * from being pruned till now.
1156  */
1158 
1159  /*
1160  * Also tell the slot where we can restart decoding from. We don't want to
1161  * do that after every commit because changing that implies an fsync of
1162  * the logical slot's state file, so we only do it every time we see a
1163  * running xacts record.
1164  *
1165  * Do so by looking for the oldest in progress transaction (determined by
1166  * the first LSN of any of its relevant records). Every transaction
1167  * remembers the last location we stored the snapshot to disk before its
1168  * beginning. That point is where we can restart from.
1169  */
1170 
1171  /*
1172  * Can't know about a serialized snapshot's location if we're not
1173  * consistent.
1174  */
1175  if (builder->state < SNAPBUILD_CONSISTENT)
1176  return;
1177 
1178  txn = ReorderBufferGetOldestTXN(builder->reorder);
1179 
1180  /*
1181  * oldest ongoing txn might have started when we didn't yet serialize
1182  * anything because we hadn't reached a consistent state yet.
1183  */
1184  if (txn != NULL && txn->restart_decoding_lsn != InvalidXLogRecPtr)
1186 
1187  /*
1188  * No in-progress transaction, can reuse the last serialized snapshot if
1189  * we have one.
1190  */
1191  else if (txn == NULL &&
1195  builder->last_serialized_snapshot);
1196 }
1197 
1198 
1199 /*
1200  * Build the start of a snapshot that's capable of decoding the catalog.
1201  *
1202  * Helper function for SnapBuildProcessRunningXacts() while we're not yet
1203  * consistent.
1204  *
1205  * Returns true if there is a point in performing internal maintenance/cleanup
1206  * using the xl_running_xacts record.
1207  */
1208 static bool
1210 {
1211  /* ---
1212  * Build catalog decoding snapshot incrementally using information about
1213  * the currently running transactions. There are several ways to do that:
1214  *
1215  * a) There were no running transactions when the xl_running_xacts record
1216  * was inserted, jump to CONSISTENT immediately. We might find such a
1217  * state we were waiting for b) and c).
1218  *
1219  * b) Wait for all toplevel transactions that were running to end. We
1220  * simply track the number of in-progress toplevel transactions and
1221  * lower it whenever one commits or aborts. When that number
1222  * (builder->running.xcnt) reaches zero, we can go from FULL_SNAPSHOT
1223  * to CONSISTENT.
1224  * NB: We need to search running.xip when seeing a transaction's end to
1225  * make sure it's a toplevel transaction and it's been one of the
1226  * initially running ones.
1227  * Interestingly, in contrast to HS, this allows us not to care about
1228  * subtransactions - and by extension suboverflowed xl_running_xacts -
1229  * at all.
1230  *
1231  * c) This (in a previous run) or another decoding slot serialized a
1232  * snapshot to disk that we can use.
1233  * ---
1234  */
1235 
1236  /*
1237  * xl_running_xact record is older than what we can use, we might not have
1238  * all necessary catalog rows anymore.
1239  */
1242  builder->initial_xmin_horizon))
1243  {
1244  ereport(DEBUG1,
1245  (errmsg_internal("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low",
1246  (uint32) (lsn >> 32), (uint32) lsn),
1247  errdetail_internal("initial xmin horizon of %u vs the snapshot's %u",
1248  builder->initial_xmin_horizon, running->oldestRunningXid)));
1249  return true;
1250  }
1251 
1252  /*
1253  * a) No transaction were running, we can jump to consistent.
1254  *
1255  * NB: We might have already started to incrementally assemble a snapshot,
1256  * so we need to be careful to deal with that.
1257  */
1258  if (running->xcnt == 0)
1259  {
1260  if (builder->start_decoding_at == InvalidXLogRecPtr ||
1261  builder->start_decoding_at <= lsn)
1262  /* can decode everything after this */
1263  builder->start_decoding_at = lsn + 1;
1264 
1265  /* As no transactions were running xmin/xmax can be trivially set. */
1266  builder->xmin = running->nextXid; /* < are finished */
1267  builder->xmax = running->nextXid; /* >= are running */
1268 
1269  /* so we can safely use the faster comparisons */
1270  Assert(TransactionIdIsNormal(builder->xmin));
1271  Assert(TransactionIdIsNormal(builder->xmax));
1272 
1273  /* no transactions running now */
1274  builder->running.xcnt = 0;
1275  builder->running.xmin = InvalidTransactionId;
1276  builder->running.xmax = InvalidTransactionId;
1277 
1278  builder->state = SNAPBUILD_CONSISTENT;
1279 
1280  ereport(LOG,
1281  (errmsg("logical decoding found consistent point at %X/%X",
1282  (uint32) (lsn >> 32), (uint32) lsn),
1283  errdetail("There are no running transactions.")));
1284 
1285  return false;
1286  }
1287  /* c) valid on disk state */
1288  else if (SnapBuildRestore(builder, lsn))
1289  {
1290  /* there won't be any state to cleanup */
1291  return false;
1292  }
1293 
1294  /*
1295  * b) first encounter of a useable xl_running_xacts record. If we had
1296  * found one earlier we would either track running transactions (i.e.
1297  * builder->running.xcnt != 0) or be consistent (this function wouldn't
1298  * get called).
1299  */
1300  else if (!builder->running.xcnt)
1301  {
1302  int off;
1303 
1304  /*
1305  * We only care about toplevel xids as those are the ones we
1306  * definitely see in the wal stream. As snapbuild.c tracks committed
1307  * instead of running transactions we don't need to know anything
1308  * about uncommitted subtransactions.
1309  */
1310 
1311  /*
1312  * Start with an xmin/xmax that's correct for future, when all the
1313  * currently running transactions have finished. We'll update both
1314  * while waiting for the pending transactions to finish.
1315  */
1316  builder->xmin = running->nextXid; /* < are finished */
1317  builder->xmax = running->nextXid; /* >= are running */
1318 
1319  /* so we can safely use the faster comparisons */
1320  Assert(TransactionIdIsNormal(builder->xmin));
1321  Assert(TransactionIdIsNormal(builder->xmax));
1322 
1323  builder->running.xcnt = running->xcnt;
1324  builder->running.xcnt_space = running->xcnt;
1325  builder->running.xip =
1326  MemoryContextAlloc(builder->context,
1327  builder->running.xcnt * sizeof(TransactionId));
1328  memcpy(builder->running.xip, running->xids,
1329  builder->running.xcnt * sizeof(TransactionId));
1330 
1331  /* sort so we can do a binary search */
1332  qsort(builder->running.xip, builder->running.xcnt,
1333  sizeof(TransactionId), xidComparator);
1334 
1335  builder->running.xmin = builder->running.xip[0];
1336  builder->running.xmax = builder->running.xip[running->xcnt - 1];
1337 
1338  /* makes comparisons cheaper later */
1339  TransactionIdRetreat(builder->running.xmin);
1340  TransactionIdAdvance(builder->running.xmax);
1341 
1342  builder->state = SNAPBUILD_FULL_SNAPSHOT;
1343 
1344  ereport(LOG,
1345  (errmsg("logical decoding found initial starting point at %X/%X",
1346  (uint32) (lsn >> 32), (uint32) lsn),
1347  errdetail_plural("%u transaction needs to finish.",
1348  "%u transactions need to finish.",
1349  builder->running.xcnt,
1350  (uint32) builder->running.xcnt)));
1351 
1352  /*
1353  * Iterate through all xids, wait for them to finish.
1354  *
1355  * This isn't required for the correctness of decoding, but to allow
1356  * isolationtester to notice that we're currently waiting for
1357  * something.
1358  */
1359  for (off = 0; off < builder->running.xcnt; off++)
1360  {
1361  TransactionId xid = builder->running.xip[off];
1362 
1363  /*
1364  * Upper layers should prevent that we ever need to wait on
1365  * ourselves. Check anyway, since failing to do so would either
1366  * result in an endless wait or an Assert() failure.
1367  */
1369  elog(ERROR, "waiting for ourselves");
1370 
1372  }
1373 
1374  /* nothing could have built up so far, so don't perform cleanup */
1375  return false;
1376  }
1377 
1378  /*
1379  * We already started to track running xacts and need to wait for all
1380  * in-progress ones to finish. We fall through to the normal processing of
1381  * records so incremental cleanup can be performed.
1382  */
1383  return true;
1384 }
1385 
1386 
1387 /* -----------------------------------
1388  * Snapshot serialization support
1389  * -----------------------------------
1390  */
1391 
1392 /*
1393  * We store current state of struct SnapBuild on disk in the following manner:
1394  *
1395  * struct SnapBuildOnDisk;
1396  * TransactionId * running.xcnt_space;
1397  * TransactionId * committed.xcnt; (*not xcnt_space*)
1398  *
1399  */
1400 typedef struct SnapBuildOnDisk
1401 {
1402  /* first part of this struct needs to be version independent */
1403 
1404  /* data not covered by checksum */
1407 
1408  /* data covered by checksum */
1409 
1410  /* version, in case we want to support pg_upgrade */
1412  /* how large is the on disk data, excluding the constant sized part */
1414 
1415  /* version dependent part */
1417 
1418  /* variable amount of TransactionIds follows */
1419 } SnapBuildOnDisk;
1420 
1421 #define SnapBuildOnDiskConstantSize \
1422  offsetof(SnapBuildOnDisk, builder)
1423 #define SnapBuildOnDiskNotChecksummedSize \
1424  offsetof(SnapBuildOnDisk, version)
1425 
1426 #define SNAPBUILD_MAGIC 0x51A1E001
1427 #define SNAPBUILD_VERSION 2
1428 
1429 /*
1430  * Store/Load a snapshot from disk, depending on the snapshot builder's state.
1431  *
1432  * Supposed to be used by external (i.e. not snapbuild.c) code that just read
1433  * a record that's a potential location for a serialized snapshot.
1434  */
1435 void
1437 {
1438  if (builder->state < SNAPBUILD_CONSISTENT)
1439  SnapBuildRestore(builder, lsn);
1440  else
1441  SnapBuildSerialize(builder, lsn);
1442 }
1443 
1444 /*
1445  * Serialize the snapshot 'builder' at the location 'lsn' if it hasn't already
1446  * been done by another decoding process.
1447  */
1448 static void
1450 {
1451  Size needed_length;
1452  SnapBuildOnDisk *ondisk;
1453  char *ondisk_c;
1454  int fd;
1455  char tmppath[MAXPGPATH];
1456  char path[MAXPGPATH];
1457  int ret;
1458  struct stat stat_buf;
1459  Size sz;
1460 
1461  Assert(lsn != InvalidXLogRecPtr);
1463  builder->last_serialized_snapshot <= lsn);
1464 
1465  /*
1466  * no point in serializing if we cannot continue to work immediately after
1467  * restoring the snapshot
1468  */
1469  if (builder->state < SNAPBUILD_CONSISTENT)
1470  return;
1471 
1472  /*
1473  * We identify snapshots by the LSN they are valid for. We don't need to
1474  * include timelines in the name as each LSN maps to exactly one timeline
1475  * unless the user used pg_resetwal or similar. If a user did so, there's
1476  * no hope continuing to decode anyway.
1477  */
1478  sprintf(path, "pg_logical/snapshots/%X-%X.snap",
1479  (uint32) (lsn >> 32), (uint32) lsn);
1480 
1481  /*
1482  * first check whether some other backend already has written the snapshot
1483  * for this LSN. It's perfectly fine if there's none, so we accept ENOENT
1484  * as a valid state. Everything else is an unexpected error.
1485  */
1486  ret = stat(path, &stat_buf);
1487 
1488  if (ret != 0 && errno != ENOENT)
1489  ereport(ERROR,
1490  (errmsg("could not stat file \"%s\": %m", path)));
1491 
1492  else if (ret == 0)
1493  {
1494  /*
1495  * somebody else has already serialized to this point, don't overwrite
1496  * but remember location, so we don't need to read old data again.
1497  *
1498  * To be sure it has been synced to disk after the rename() from the
1499  * tempfile filename to the real filename, we just repeat the fsync.
1500  * That ought to be cheap because in most scenarios it should already
1501  * be safely on disk.
1502  */
1503  fsync_fname(path, false);
1504  fsync_fname("pg_logical/snapshots", true);
1505 
1506  builder->last_serialized_snapshot = lsn;
1507  goto out;
1508  }
1509 
1510  /*
1511  * there is an obvious race condition here between the time we stat(2) the
1512  * file and us writing the file. But we rename the file into place
1513  * atomically and all files created need to contain the same data anyway,
1514  * so this is perfectly fine, although a bit of a resource waste. Locking
1515  * seems like pointless complication.
1516  */
1517  elog(DEBUG1, "serializing snapshot to %s", path);
1518 
1519  /* to make sure only we will write to this tempfile, include pid */
1520  sprintf(tmppath, "pg_logical/snapshots/%X-%X.snap.%u.tmp",
1521  (uint32) (lsn >> 32), (uint32) lsn, MyProcPid);
1522 
1523  /*
1524  * Unlink temporary file if it already exists, needs to have been before a
1525  * crash/error since we won't enter this function twice from within a
1526  * single decoding slot/backend and the temporary file contains the pid of
1527  * the current process.
1528  */
1529  if (unlink(tmppath) != 0 && errno != ENOENT)
1530  ereport(ERROR,
1532  errmsg("could not remove file \"%s\": %m", path)));
1533 
1534  needed_length = sizeof(SnapBuildOnDisk) +
1535  sizeof(TransactionId) * builder->running.xcnt_space +
1536  sizeof(TransactionId) * builder->committed.xcnt;
1537 
1538  ondisk_c = MemoryContextAllocZero(builder->context, needed_length);
1539  ondisk = (SnapBuildOnDisk *) ondisk_c;
1540  ondisk->magic = SNAPBUILD_MAGIC;
1541  ondisk->version = SNAPBUILD_VERSION;
1542  ondisk->length = needed_length;
1543  INIT_CRC32C(ondisk->checksum);
1544  COMP_CRC32C(ondisk->checksum,
1545  ((char *) ondisk) + SnapBuildOnDiskNotChecksummedSize,
1547  ondisk_c += sizeof(SnapBuildOnDisk);
1548 
1549  memcpy(&ondisk->builder, builder, sizeof(SnapBuild));
1550  /* NULL-ify memory-only data */
1551  ondisk->builder.context = NULL;
1552  ondisk->builder.snapshot = NULL;
1553  ondisk->builder.reorder = NULL;
1554  ondisk->builder.running.xip = NULL;
1555  ondisk->builder.committed.xip = NULL;
1556 
1557  COMP_CRC32C(ondisk->checksum,
1558  &ondisk->builder,
1559  sizeof(SnapBuild));
1560 
1561  /* copy running xacts */
1562  sz = sizeof(TransactionId) * builder->running.xcnt_space;
1563  memcpy(ondisk_c, builder->running.xip, sz);
1564  COMP_CRC32C(ondisk->checksum, ondisk_c, sz);
1565  ondisk_c += sz;
1566 
1567  /* copy committed xacts */
1568  sz = sizeof(TransactionId) * builder->committed.xcnt;
1569  memcpy(ondisk_c, builder->committed.xip, sz);
1570  COMP_CRC32C(ondisk->checksum, ondisk_c, sz);
1571  ondisk_c += sz;
1572 
1573  FIN_CRC32C(ondisk->checksum);
1574 
1575  /* we have valid data now, open tempfile and write it there */
1576  fd = OpenTransientFile(tmppath,
1577  O_CREAT | O_EXCL | O_WRONLY | PG_BINARY,
1578  S_IRUSR | S_IWUSR);
1579  if (fd < 0)
1580  ereport(ERROR,
1581  (errmsg("could not open file \"%s\": %m", path)));
1582 
1583  if ((write(fd, ondisk, needed_length)) != needed_length)
1584  {
1585  CloseTransientFile(fd);
1586  ereport(ERROR,
1588  errmsg("could not write to file \"%s\": %m", tmppath)));
1589  }
1590 
1591  /*
1592  * fsync the file before renaming so that even if we crash after this we
1593  * have either a fully valid file or nothing.
1594  *
1595  * TODO: Do the fsync() via checkpoints/restartpoints, doing it here has
1596  * some noticeable overhead since it's performed synchronously during
1597  * decoding?
1598  */
1599  if (pg_fsync(fd) != 0)
1600  {
1601  CloseTransientFile(fd);
1602  ereport(ERROR,
1604  errmsg("could not fsync file \"%s\": %m", tmppath)));
1605  }
1606  CloseTransientFile(fd);
1607 
1608  fsync_fname("pg_logical/snapshots", true);
1609 
1610  /*
1611  * We may overwrite the work from some other backend, but that's ok, our
1612  * snapshot is valid as well, we'll just have done some superfluous work.
1613  */
1614  if (rename(tmppath, path) != 0)
1615  {
1616  ereport(ERROR,
1618  errmsg("could not rename file \"%s\" to \"%s\": %m",
1619  tmppath, path)));
1620  }
1621 
1622  /* make sure we persist */
1623  fsync_fname(path, false);
1624  fsync_fname("pg_logical/snapshots", true);
1625 
1626  /*
1627  * Now there's no way we can loose the dumped state anymore, remember this
1628  * as a serialization point.
1629  */
1630  builder->last_serialized_snapshot = lsn;
1631 
1632 out:
1634  builder->last_serialized_snapshot);
1635 }
1636 
1637 /*
1638  * Restore a snapshot into 'builder' if previously one has been stored at the
1639  * location indicated by 'lsn'. Returns true if successful, false otherwise.
1640  */
1641 static bool
1643 {
1644  SnapBuildOnDisk ondisk;
1645  int fd;
1646  char path[MAXPGPATH];
1647  Size sz;
1648  int readBytes;
1649  pg_crc32c checksum;
1650 
1651  /* no point in loading a snapshot if we're already there */
1652  if (builder->state == SNAPBUILD_CONSISTENT)
1653  return false;
1654 
1655  sprintf(path, "pg_logical/snapshots/%X-%X.snap",
1656  (uint32) (lsn >> 32), (uint32) lsn);
1657 
1658  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
1659 
1660  if (fd < 0 && errno == ENOENT)
1661  return false;
1662  else if (fd < 0)
1663  ereport(ERROR,
1665  errmsg("could not open file \"%s\": %m", path)));
1666 
1667  /* ----
1668  * Make sure the snapshot had been stored safely to disk, that's normally
1669  * cheap.
1670  * Note that we do not need PANIC here, nobody will be able to use the
1671  * slot without fsyncing, and saving it won't succeed without an fsync()
1672  * either...
1673  * ----
1674  */
1675  fsync_fname(path, false);
1676  fsync_fname("pg_logical/snapshots", true);
1677 
1678 
1679  /* read statically sized portion of snapshot */
1680  readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
1681  if (readBytes != SnapBuildOnDiskConstantSize)
1682  {
1683  CloseTransientFile(fd);
1684  ereport(ERROR,
1686  errmsg("could not read file \"%s\", read %d of %d: %m",
1687  path, readBytes, (int) SnapBuildOnDiskConstantSize)));
1688  }
1689 
1690  if (ondisk.magic != SNAPBUILD_MAGIC)
1691  ereport(ERROR,
1692  (errmsg("snapbuild state file \"%s\" has wrong magic number: %u instead of %u",
1693  path, ondisk.magic, SNAPBUILD_MAGIC)));
1694 
1695  if (ondisk.version != SNAPBUILD_VERSION)
1696  ereport(ERROR,
1697  (errmsg("snapbuild state file \"%s\" has unsupported version: %u instead of %u",
1698  path, ondisk.version, SNAPBUILD_VERSION)));
1699 
1700  INIT_CRC32C(checksum);
1701  COMP_CRC32C(checksum,
1702  ((char *) &ondisk) + SnapBuildOnDiskNotChecksummedSize,
1704 
1705  /* read SnapBuild */
1706  readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
1707  if (readBytes != sizeof(SnapBuild))
1708  {
1709  CloseTransientFile(fd);
1710  ereport(ERROR,
1712  errmsg("could not read file \"%s\", read %d of %d: %m",
1713  path, readBytes, (int) sizeof(SnapBuild))));
1714  }
1715  COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
1716 
1717  /* restore running xacts information */
1718  sz = sizeof(TransactionId) * ondisk.builder.running.xcnt_space;
1719  ondisk.builder.running.xip = MemoryContextAllocZero(builder->context, sz);
1720  readBytes = read(fd, ondisk.builder.running.xip, sz);
1721  if (readBytes != sz)
1722  {
1723  CloseTransientFile(fd);
1724  ereport(ERROR,
1726  errmsg("could not read file \"%s\", read %d of %d: %m",
1727  path, readBytes, (int) sz)));
1728  }
1729  COMP_CRC32C(checksum, ondisk.builder.running.xip, sz);
1730 
1731  /* restore committed xacts information */
1732  sz = sizeof(TransactionId) * ondisk.builder.committed.xcnt;
1733  ondisk.builder.committed.xip = MemoryContextAllocZero(builder->context, sz);
1734  readBytes = read(fd, ondisk.builder.committed.xip, sz);
1735  if (readBytes != sz)
1736  {
1737  CloseTransientFile(fd);
1738  ereport(ERROR,
1740  errmsg("could not read file \"%s\", read %d of %d: %m",
1741  path, readBytes, (int) sz)));
1742  }
1743  COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
1744 
1745  CloseTransientFile(fd);
1746 
1747  FIN_CRC32C(checksum);
1748 
1749  /* verify checksum of what we've read */
1750  if (!EQ_CRC32C(checksum, ondisk.checksum))
1751  ereport(ERROR,
1753  errmsg("checksum mismatch for snapbuild state file \"%s\": is %u, should be %u",
1754  path, checksum, ondisk.checksum)));
1755 
1756  /*
1757  * ok, we now have a sensible snapshot here, figure out if it has more
1758  * information than we have.
1759  */
1760 
1761  /*
1762  * We are only interested in consistent snapshots for now, comparing
1763  * whether one incomplete snapshot is more "advanced" seems to be
1764  * unnecessarily complex.
1765  */
1766  if (ondisk.builder.state < SNAPBUILD_CONSISTENT)
1767  goto snapshot_not_interesting;
1768 
1769  /*
1770  * Don't use a snapshot that requires an xmin that we cannot guarantee to
1771  * be available.
1772  */
1774  goto snapshot_not_interesting;
1775 
1776 
1777  /* ok, we think the snapshot is sensible, copy over everything important */
1778  builder->xmin = ondisk.builder.xmin;
1779  builder->xmax = ondisk.builder.xmax;
1780  builder->state = ondisk.builder.state;
1781 
1782  builder->committed.xcnt = ondisk.builder.committed.xcnt;
1783  /* We only allocated/stored xcnt, not xcnt_space xids ! */
1784  /* don't overwrite preallocated xip, if we don't have anything here */
1785  if (builder->committed.xcnt > 0)
1786  {
1787  pfree(builder->committed.xip);
1788  builder->committed.xcnt_space = ondisk.builder.committed.xcnt;
1789  builder->committed.xip = ondisk.builder.committed.xip;
1790  }
1791  ondisk.builder.committed.xip = NULL;
1792 
1793  builder->running.xcnt = ondisk.builder.running.xcnt;
1794  if (builder->running.xip)
1795  pfree(builder->running.xip);
1796  builder->running.xcnt_space = ondisk.builder.running.xcnt_space;
1797  builder->running.xip = ondisk.builder.running.xip;
1798 
1799  /* our snapshot is not interesting anymore, build a new one */
1800  if (builder->snapshot != NULL)
1801  {
1803  }
1806 
1807  ReorderBufferSetRestartPoint(builder->reorder, lsn);
1808 
1809  Assert(builder->state == SNAPBUILD_CONSISTENT);
1810 
1811  ereport(LOG,
1812  (errmsg("logical decoding found consistent point at %X/%X",
1813  (uint32) (lsn >> 32), (uint32) lsn),
1814  errdetail("Logical decoding will begin using saved snapshot.")));
1815  return true;
1816 
1817 snapshot_not_interesting:
1818  if (ondisk.builder.running.xip != NULL)
1819  pfree(ondisk.builder.running.xip);
1820  if (ondisk.builder.committed.xip != NULL)
1821  pfree(ondisk.builder.committed.xip);
1822  return false;
1823 }
1824 
1825 /*
1826  * Remove all serialized snapshots that are not required anymore because no
1827  * slot can need them. This doesn't actually have to run during a checkpoint,
1828  * but it's a convenient point to schedule this.
1829  *
1830  * NB: We run this during checkpoints even if logical decoding is disabled so
1831  * we cleanup old slots at some point after it got disabled.
1832  */
1833 void
1835 {
1836  XLogRecPtr cutoff;
1837  XLogRecPtr redo;
1838  DIR *snap_dir;
1839  struct dirent *snap_de;
1840  char path[MAXPGPATH];
1841 
1842  /*
1843  * We start of with a minimum of the last redo pointer. No new replication
1844  * slot will start before that, so that's a safe upper bound for removal.
1845  */
1846  redo = GetRedoRecPtr();
1847 
1848  /* now check for the restart ptrs from existing slots */
1850 
1851  /* don't start earlier than the restart lsn */
1852  if (redo < cutoff)
1853  cutoff = redo;
1854 
1855  snap_dir = AllocateDir("pg_logical/snapshots");
1856  while ((snap_de = ReadDir(snap_dir, "pg_logical/snapshots")) != NULL)
1857  {
1858  uint32 hi;
1859  uint32 lo;
1860  XLogRecPtr lsn;
1861  struct stat statbuf;
1862 
1863  if (strcmp(snap_de->d_name, ".") == 0 ||
1864  strcmp(snap_de->d_name, "..") == 0)
1865  continue;
1866 
1867  snprintf(path, MAXPGPATH, "pg_logical/snapshots/%s", snap_de->d_name);
1868 
1869  if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
1870  {
1871  elog(DEBUG1, "only regular files expected: %s", path);
1872  continue;
1873  }
1874 
1875  /*
1876  * temporary filenames from SnapBuildSerialize() include the LSN and
1877  * everything but are postfixed by .$pid.tmp. We can just remove them
1878  * the same as other files because there can be none that are
1879  * currently being written that are older than cutoff.
1880  *
1881  * We just log a message if a file doesn't fit the pattern, it's
1882  * probably some editors lock/state file or similar...
1883  */
1884  if (sscanf(snap_de->d_name, "%X-%X.snap", &hi, &lo) != 2)
1885  {
1886  ereport(LOG,
1887  (errmsg("could not parse file name \"%s\"", path)));
1888  continue;
1889  }
1890 
1891  lsn = ((uint64) hi) << 32 | lo;
1892 
1893  /* check whether we still need it */
1894  if (lsn < cutoff || cutoff == InvalidXLogRecPtr)
1895  {
1896  elog(DEBUG1, "removing snapbuild snapshot %s", path);
1897 
1898  /*
1899  * It's not particularly harmful, though strange, if we can't
1900  * remove the file here. Don't prevent the checkpoint from
1901  * completing, that'd be cure worse than the disease.
1902  */
1903  if (unlink(path) < 0)
1904  {
1905  ereport(LOG,
1907  errmsg("could not remove file \"%s\": %m",
1908  path)));
1909  continue;
1910  }
1911  }
1912  }
1913  FreeDir(snap_dir);
1914 }
#define TransactionIdAdvance(dest)
Definition: transam.h:48
static bool SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:1642
bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *rb, TransactionId xid)
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
uint32 CommandId
Definition: c.h:408
void AbortCurrentTransaction(void)
Definition: xact.c:2984
SnapshotSatisfiesFunc satisfies
Definition: snapshot.h:54
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:200
#define DEBUG1
Definition: elog.h:25
int MyProcPid
Definition: globals.c:38
static void SnapBuildAddCommittedTxn(SnapBuild *builder, TransactionId xid)
Definition: snapbuild.c:822
#define SNAPBUILD_VERSION
Definition: snapbuild.c:1427
static void test(void)
uint32 TransactionId
Definition: c.h:394
bool copied
Definition: snapshot.h:94
bool SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr)
Definition: snapbuild.c:372
#define SNAPBUILD_MAGIC
Definition: snapbuild.c:1426
void SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, int nsubxacts, TransactionId *subxacts)
Definition: snapbuild.c:949
#define DEBUG3
Definition: elog.h:23
SnapBuildState SnapBuildCurrentState(SnapBuild *builder)
Definition: snapbuild.c:363
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:772
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:850
#define write(a, b, c)
Definition: win32.h:19
TransactionId xmin
Definition: proc.h:203
CommandId combocid
Definition: heapam_xlog.h:348
pg_crc32c checksum
Definition: snapbuild.c:1406
Snapshot snapshot
Definition: snapbuild.c:169
#define dlist_foreach(iter, lhead)
Definition: ilist.h:507
uint32 pg_crc32c
Definition: pg_crc32c.h:38
void LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
Definition: logical.c:815
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
XLogRecPtr current_restart_decoding_lsn
#define XACT_REPEATABLE_READ
Definition: xact.h:30
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:567
void ReorderBufferSetBaseSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:349
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
CommandId cmax
Definition: heapam_xlog.h:341
bool IsTransactionOrTransactionBlock(void)
Definition: xact.c:4320
size_t xcnt_space
Definition: snapbuild.c:199
ItemPointerData target_tid
Definition: heapam_xlog.h:354
char * ExportSnapshot(Snapshot snapshot)
Definition: snapmgr.c:1150
bool suboverflowed
Definition: snapshot.h:91
void ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, RelFileNode node, ItemPointerData tid, CommandId cmin, CommandId cmax, CommandId combocid)
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running)
Definition: snapbuild.c:1209
size_t xcnt
Definition: snapbuild.c:198
TransactionId GetTopTransactionId(void)
Definition: xact.c:388
#define LOG
Definition: elog.h:26
#define TransactionIdRetreat(dest)
Definition: transam.h:56
Definition: dirent.h:9
uint32 regd_count
Definition: snapshot.h:108
static void SnapBuildEndTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid)
Definition: snapbuild.c:893
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1038
static ResourceOwner SavedResourceOwnerDuringExport
Definition: snapbuild.c:244
PGXACT * MyPgXact
Definition: proc.c:68
#define FirstCommandId
Definition: c.h:410
int errdetail_internal(const char *fmt,...)
Definition: elog.c:900
bool SnapBuildProcessChange(SnapBuild *builder, TransactionId xid, XLogRecPtr lsn)
Definition: snapbuild.c:652
void ReorderBufferSetRestartPoint(ReorderBuffer *rb, XLogRecPtr ptr)
TransactionId xids[FLEXIBLE_ARRAY_MEMBER]
Definition: standbydefs.h:56
const char * SnapBuildExportSnapshot(SnapBuild *builder)
Definition: snapbuild.c:511
SnapBuildState state
Definition: snapbuild.c:143
static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:1449
#define dlist_container(type, membername, ptr)
Definition: ilist.h:477
void pfree(void *pointer)
Definition: mcxt.c:992
void SnapBuildAbortTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, int nsubxacts, TransactionId *subxacts)
Definition: snapbuild.c:929
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
Definition: lmgr.h:26
struct SnapBuild::@22 running
bool FirstSnapshotSet
Definition: snapmgr.c:203
#define MAXPGPATH
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:145
static void SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:776
static bool ExportInProgress
Definition: snapbuild.c:245
#define DEBUG2
Definition: elog.h:24
SnapBuild builder
Definition: snapbuild.c:1416
ReorderBuffer * reorder
Definition: snapbuild.c:179
TransactionId initial_xmin_horizon
Definition: snapbuild.c:164
TransactionId * xip
Definition: snapbuild.c:200
void SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid, XLogRecPtr lsn, xl_heap_new_cid *xlrec)
Definition: snapbuild.c:702
static void SnapBuildFreeSnapshot(Snapshot snap)
Definition: snapbuild.c:338
int OpenTransientFile(FileName fileName, int fileFlags, int fileMode)
Definition: fd.c:2093
int errdetail(const char *fmt,...)
Definition: elog.c:873
int errcode_for_file_access(void)
Definition: elog.c:598
XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void)
Definition: slot.c:707
struct SnapshotData SnapshotData
bool includes_all_transactions
Definition: snapbuild.c:220
#define InvalidTransactionId
Definition: transam.h:31
void CheckPointSnapBuild(void)
Definition: snapbuild.c:1834
bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *rb, TransactionId xid)
unsigned int uint32
Definition: c.h:265
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2284
void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running)
Definition: snapbuild.c:1114
TransactionId xmax
Definition: snapshot.h:67
TransactionId xmin
Definition: snapshot.h:66
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
SnapBuildState
Definition: snapbuild.h:18
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
static Snapshot SnapBuildBuildSnapshot(SnapBuild *builder, TransactionId xid)
Definition: snapbuild.c:428
XLogRecPtr last_serialized_snapshot
Definition: snapbuild.c:174
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId * xip
Definition: snapshot.h:77
int CloseTransientFile(int fd)
Definition: fd.c:2254
TransactionId xmax
Definition: snapbuild.c:152
RelFileNode target_node
Definition: heapam_xlog.h:353
MemoryContext AllocSetContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: aset.c:440
void * palloc0(Size size)
Definition: mcxt.c:920
bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
Definition: tqual.c:1652
#define InvalidCommandId
Definition: c.h:411
dlist_head toplevel_by_lsn
TransactionId xid
dlist_node * cur
Definition: ilist.h:161
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:784
void ReorderBufferAddSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
CommandId curcid
Definition: snapshot.h:96
#define SnapBuildOnDiskConstantSize
Definition: snapbuild.c:1421
static bool SnapBuildTxnIsRunning(SnapBuild *builder, TransactionId xid)
Definition: snapbuild.c:743
int GetMaxSnapshotXidCount(void)
Definition: procarray.c:1451
bool XactReadOnly
Definition: xact.c:77
void FreeSnapshotBuilder(SnapBuild *builder)
Definition: snapbuild.c:319
SnapBuild * AllocateSnapshotBuilder(ReorderBuffer *reorder, TransactionId xmin_horizon, XLogRecPtr start_lsn)
Definition: snapbuild.c:280
int errmsg_internal(const char *fmt,...)
Definition: elog.c:827
#define Max(x, y)
Definition: c.h:796
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:554
#define NULL
Definition: c.h:226
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:671
ReorderBufferTXN * ReorderBufferGetOldestTXN(ReorderBuffer *rb)
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2350
void LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
Definition: logical.c:758
void StartTransactionCommand(void)
Definition: xact.c:2675
#define SnapBuildOnDiskNotChecksummedSize
Definition: snapbuild.c:1423
bool takenDuringRecovery
Definition: snapshot.h:93
#define NormalTransactionIdFollows(id1, id2)
Definition: transam.h:67
size_t Size
Definition: c.h:353
int errdetail_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:965
int XactIsoLevel
Definition: xact.c:74
bool IsTransactionState(void)
Definition: xact.c:349
MemoryContext context
Definition: snapbuild.c:146
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:8095
TransactionId nextXid
Definition: standbydefs.h:52
#define NormalTransactionIdPrecedes(id1, id2)
Definition: transam.h:62
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1021
void SnapBuildSnapDecRefcount(Snapshot snap)
Definition: snapbuild.c:396
uint32 xcnt
Definition: snapshot.h:78
void * palloc(Size size)
Definition: mcxt.c:891
int errmsg(const char *fmt,...)
Definition: elog.c:797
TransactionId xmin
Definition: snapbuild.c:149
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:749
int i
void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn)
Definition: snapbuild.c:1436
XLogRecPtr restart_decoding_lsn
TransactionId oldestRunningXid
Definition: standbydefs.h:53
struct SnapBuild::@23 committed
CommandId cmin
Definition: heapam_xlog.h:340
int pg_fsync(int fd)
Definition: fd.c:333
void ReorderBufferXidSetCatalogChanges(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
char d_name[MAX_PATH]
Definition: dirent.h:14
struct SnapBuildOnDisk SnapBuildOnDisk
#define elog
Definition: elog.h:219
#define qsort(a, b, c, d)
Definition: port.h:440
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:73
static void SnapBuildSnapIncRefcount(Snapshot snap)
Definition: snapbuild.c:384
#define lstat(path, sb)
Definition: win32.h:272
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:78
XLogRecPtr start_decoding_at
Definition: snapbuild.c:158
static void SnapBuildPurgeCommittedTxn(SnapBuild *builder)
Definition: snapbuild.c:851
#define read(a, b, c)
Definition: win32.h:18
int FreeDir(DIR *dir)
Definition: fd.c:2393
void SnapBuildClearExportedSnapshot(void)
Definition: snapbuild.c:629
TransactionId * subxip
Definition: snapshot.h:89
uint32 active_count
Definition: snapshot.h:107
int xidComparator(const void *arg1, const void *arg2)
Definition: xid.c:138
int32 subxcnt
Definition: snapshot.h:90
TransactionId top_xid
Definition: heapam_xlog.h:339
void ReorderBufferAddNewCommandId(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, CommandId cid)
Snapshot SnapBuildGetOrBuildSnapshot(SnapBuild *builder, TransactionId xid)
Definition: snapbuild.c:608