PostgreSQL Source Code git master
Loading...
Searching...
No Matches
reorderbuffer.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <sys/stat.h>
#include "access/detoast.h"
#include "access/heapam.h"
#include "access/rewriteheap.h"
#include "access/transam.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "catalog/catalog.h"
#include "common/int.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/reorderbuffer.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/procarray.h"
#include "storage/sinval.h"
#include "utils/builtins.h"
#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/relfilenumbermap.h"
#include "utils/wait_event.h"
Include dependency graph for reorderbuffer.c:

Go to the source code of this file.

Data Structures

struct  ReorderBufferTXNByIdEnt
 
struct  ReorderBufferTupleCidKey
 
struct  ReorderBufferTupleCidEnt
 
struct  TXNEntryFile
 
struct  ReorderBufferIterTXNEntry
 
struct  ReorderBufferIterTXNState
 
struct  ReorderBufferToastEnt
 
struct  ReorderBufferDiskChange
 
struct  RewriteMappingFile
 

Macros

#define MAX_DISTR_INVAL_MSG_PER_TXN    ((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage))
 
#define IsSpecInsert(action)
 
#define IsSpecConfirmOrAbort(action)
 
#define IsInsertOrUpdate(action)
 
#define CHANGES_THRESHOLD   100
 

Typedefs

typedef struct ReorderBufferTXNByIdEnt ReorderBufferTXNByIdEnt
 
typedef struct ReorderBufferTupleCidKey ReorderBufferTupleCidKey
 
typedef struct ReorderBufferTupleCidEnt ReorderBufferTupleCidEnt
 
typedef struct TXNEntryFile TXNEntryFile
 
typedef struct ReorderBufferIterTXNEntry ReorderBufferIterTXNEntry
 
typedef struct ReorderBufferIterTXNState ReorderBufferIterTXNState
 
typedef struct ReorderBufferToastEnt ReorderBufferToastEnt
 
typedef struct ReorderBufferDiskChange ReorderBufferDiskChange
 
typedef struct RewriteMappingFile RewriteMappingFile
 

Functions

static ReorderBufferTXNReorderBufferAllocTXN (ReorderBuffer *rb)
 
static void ReorderBufferFreeTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static ReorderBufferTXNReorderBufferTXNByXid (ReorderBuffer *rb, TransactionId xid, bool create, bool *is_new, XLogRecPtr lsn, bool create_as_top)
 
static void ReorderBufferTransferSnapToParent (ReorderBufferTXN *txn, ReorderBufferTXN *subtxn)
 
static void AssertTXNLsnOrder (ReorderBuffer *rb)
 
static void ReorderBufferIterTXNInit (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferIterTXNState *volatile *iter_state)
 
static ReorderBufferChangeReorderBufferIterTXNNext (ReorderBuffer *rb, ReorderBufferIterTXNState *state)
 
static void ReorderBufferIterTXNFinish (ReorderBuffer *rb, ReorderBufferIterTXNState *state)
 
static void ReorderBufferExecuteInvalidations (uint32 nmsgs, SharedInvalidationMessage *msgs)
 
static void ReorderBufferCheckMemoryLimit (ReorderBuffer *rb)
 
static void ReorderBufferSerializeTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferSerializeChange (ReorderBuffer *rb, ReorderBufferTXN *txn, int fd, ReorderBufferChange *change)
 
static Size ReorderBufferRestoreChanges (ReorderBuffer *rb, ReorderBufferTXN *txn, TXNEntryFile *file, XLogSegNo *segno)
 
static void ReorderBufferRestoreChange (ReorderBuffer *rb, ReorderBufferTXN *txn, char *data)
 
static void ReorderBufferRestoreCleanup (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferTruncateTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, bool txn_prepared)
 
static void ReorderBufferMaybeMarkTXNStreamed (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static bool ReorderBufferCheckAndTruncateAbortedTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferCleanupSerializedTXNs (const char *slotname)
 
static void ReorderBufferSerializedPath (char *path, ReplicationSlot *slot, TransactionId xid, XLogSegNo segno)
 
static int ReorderBufferTXNSizeCompare (const pairingheap_node *a, const pairingheap_node *b, void *arg)
 
static void ReorderBufferFreeSnap (ReorderBuffer *rb, Snapshot snap)
 
static Snapshot ReorderBufferCopySnap (ReorderBuffer *rb, Snapshot orig_snap, ReorderBufferTXN *txn, CommandId cid)
 
static bool ReorderBufferCanStream (ReorderBuffer *rb)
 
static bool ReorderBufferCanStartStreaming (ReorderBuffer *rb)
 
static void ReorderBufferStreamTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferStreamCommit (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastInitHash (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastReset (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastReplace (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
 
static void ReorderBufferToastAppendChunk (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
 
static Size ReorderBufferChangeSize (ReorderBufferChange *change)
 
static void ReorderBufferChangeMemoryUpdate (ReorderBuffer *rb, ReorderBufferChange *change, ReorderBufferTXN *txn, bool addition, Size sz)
 
ReorderBufferReorderBufferAllocate (void)
 
void ReorderBufferFree (ReorderBuffer *rb)
 
ReorderBufferChangeReorderBufferAllocChange (ReorderBuffer *rb)
 
void ReorderBufferFreeChange (ReorderBuffer *rb, ReorderBufferChange *change, bool upd_mem)
 
HeapTuple ReorderBufferAllocTupleBuf (ReorderBuffer *rb, Size tuple_len)
 
void ReorderBufferFreeTupleBuf (HeapTuple tuple)
 
OidReorderBufferAllocRelids (ReorderBuffer *rb, int nrelids)
 
void ReorderBufferFreeRelids (ReorderBuffer *rb, Oid *relids)
 
static void ReorderBufferProcessPartialChange (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool toast_insert)
 
void ReorderBufferQueueChange (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, ReorderBufferChange *change, bool toast_insert)
 
void ReorderBufferQueueMessage (ReorderBuffer *rb, TransactionId xid, Snapshot snap, XLogRecPtr lsn, bool transactional, const char *prefix, Size message_size, const char *message)
 
static void AssertChangeLsnOrder (ReorderBufferTXN *txn)
 
ReorderBufferTXNReorderBufferGetOldestTXN (ReorderBuffer *rb)
 
TransactionId ReorderBufferGetOldestXmin (ReorderBuffer *rb)
 
void ReorderBufferSetRestartPoint (ReorderBuffer *rb, XLogRecPtr ptr)
 
void ReorderBufferAssignChild (ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr lsn)
 
void ReorderBufferCommitChild (ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn)
 
static int ReorderBufferIterCompare (Datum a, Datum b, void *arg)
 
static void ReorderBufferCleanupTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferBuildTupleCidHash (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void SetupCheckXidLive (TransactionId xid)
 
static void ReorderBufferApplyChange (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferApplyTruncate (ReorderBuffer *rb, ReorderBufferTXN *txn, int nrelations, Relation *relations, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferApplyMessage (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferSaveTXNSnapshot (ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id)
 
static void ReorderBufferResetTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id, XLogRecPtr last_lsn)
 
static void ReorderBufferProcessTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr commit_lsn, volatile Snapshot snapshot_now, volatile CommandId command_id, bool streaming)
 
static void ReorderBufferReplay (ReorderBufferTXN *txn, ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
 
void ReorderBufferCommit (ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
 
bool ReorderBufferRememberPrepareInfo (ReorderBuffer *rb, TransactionId xid, XLogRecPtr prepare_lsn, XLogRecPtr end_lsn, TimestampTz prepare_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
 
void ReorderBufferSkipPrepare (ReorderBuffer *rb, TransactionId xid)
 
void ReorderBufferPrepare (ReorderBuffer *rb, TransactionId xid, char *gid)
 
void ReorderBufferFinishPrepared (ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, XLogRecPtr two_phase_at, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn, char *gid, bool is_commit)
 
void ReorderBufferAbort (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, TimestampTz abort_time)
 
void ReorderBufferAbortOld (ReorderBuffer *rb, TransactionId oldestRunningXid)
 
void ReorderBufferForget (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferInvalidate (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferImmediateInvalidation (ReorderBuffer *rb, uint32 ninvalidations, SharedInvalidationMessage *invalidations)
 
void ReorderBufferProcessXid (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferAddSnapshot (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
 
void ReorderBufferSetBaseSnapshot (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
 
void ReorderBufferAddNewCommandId (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, CommandId cid)
 
void ReorderBufferAddNewTupleCids (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, RelFileLocator locator, ItemPointerData tid, CommandId cmin, CommandId cmax, CommandId combocid)
 
static void ReorderBufferQueueInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
static void ReorderBufferAccumulateInvalidations (SharedInvalidationMessage **invals_out, uint32 *ninvals_out, SharedInvalidationMessage *msgs_new, Size nmsgs_new)
 
void ReorderBufferAddInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
void ReorderBufferAddDistributedInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
void ReorderBufferXidSetCatalogChanges (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
TransactionIdReorderBufferGetCatalogChangesXacts (ReorderBuffer *rb)
 
bool ReorderBufferXidHasCatalogChanges (ReorderBuffer *rb, TransactionId xid)
 
bool ReorderBufferXidHasBaseSnapshot (ReorderBuffer *rb, TransactionId xid)
 
static void ReorderBufferSerializeReserve (ReorderBuffer *rb, Size sz)
 
static ReorderBufferTXNReorderBufferLargestTXN (ReorderBuffer *rb)
 
static ReorderBufferTXNReorderBufferLargestStreamableTopTXN (ReorderBuffer *rb)
 
void StartupReorderBuffer (void)
 
static void ApplyLogicalMappingFile (HTAB *tuplecid_data, const char *fname)
 
static bool TransactionIdInArray (TransactionId xid, TransactionId *xip, Size num)
 
static int file_sort_by_lsn (const ListCell *a_p, const ListCell *b_p)
 
static void UpdateLogicalMappings (HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
 
bool ResolveCminCmaxDuringDecoding (HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
uint32 ReorderBufferGetInvalidations (ReorderBuffer *rb, TransactionId xid, SharedInvalidationMessage **msgs)
 

Variables

int logical_decoding_work_mem
 
static const Size max_changes_in_memory = 4096
 
int debug_logical_replication_streaming = DEBUG_LOGICAL_REP_STREAMING_BUFFERED
 

Macro Definition Documentation

◆ CHANGES_THRESHOLD

#define CHANGES_THRESHOLD   100

◆ IsInsertOrUpdate

#define IsInsertOrUpdate (   action)
Value:
( \
(((action) == REORDER_BUFFER_CHANGE_INSERT) || \
((action) == REORDER_BUFFER_CHANGE_UPDATE) || \
)
@ REORDER_BUFFER_CHANGE_INSERT
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT
@ REORDER_BUFFER_CHANGE_UPDATE

Definition at line 207 of file reorderbuffer.c.

325{
326 ReorderBuffer *buffer;
329
331
332 /* allocate memory in own context, to have better accountability */
334 "ReorderBuffer",
336
337 buffer =
339
340 memset(&hash_ctl, 0, sizeof(hash_ctl));
341
342 buffer->context = new_ctx;
343
345 "Change",
347 sizeof(ReorderBufferChange));
348
350 "TXN",
352 sizeof(ReorderBufferTXN));
353
354 /*
355 * To minimize memory fragmentation caused by long-running transactions
356 * with changes spanning multiple memory blocks, we use a single
357 * fixed-size memory block for decoded tuple storage. The performance
358 * testing showed that the default memory block size maintains logical
359 * decoding performance without causing fragmentation due to concurrent
360 * transactions. One might think that we can use the max size as
361 * SLAB_LARGE_BLOCK_SIZE but the test also showed it doesn't help resolve
362 * the memory fragmentation.
363 */
365 "Tuples",
369
370 hash_ctl.keysize = sizeof(TransactionId);
371 hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
372 hash_ctl.hcxt = buffer->context;
373
374 buffer->by_txn = hash_create("ReorderBufferByXid", 1000, &hash_ctl,
376
378 buffer->by_txn_last_txn = NULL;
379
380 buffer->outbuf = NULL;
381 buffer->outbufsize = 0;
382 buffer->size = 0;
383
384 /* txn_heap is ordered by transaction size */
386
387 buffer->spillTxns = 0;
388 buffer->spillCount = 0;
389 buffer->spillBytes = 0;
390 buffer->streamTxns = 0;
391 buffer->streamCount = 0;
392 buffer->streamBytes = 0;
393 buffer->memExceededCount = 0;
394 buffer->totalTxns = 0;
395 buffer->totalBytes = 0;
396
398
399 dlist_init(&buffer->toplevel_by_lsn);
401 dclist_init(&buffer->catchange_txns);
402
403 /*
404 * Ensure there's no stale data from prior uses of this slot, in case some
405 * prior exit avoided calling ReorderBufferFree. Failure to do this can
406 * produce duplicated txns, and it's very cheap if there's nothing there.
407 */
409
410 return buffer;
411}
412
413/*
414 * Free a ReorderBuffer
415 */
416void
418{
419 MemoryContext context = rb->context;
420
421 /*
422 * We free separately allocated data by entirely scrapping reorderbuffer's
423 * memory context.
424 */
425 MemoryContextDelete(context);
426
427 /* Free disk space used by unconsumed reorder buffers */
429}
430
431/*
432 * Allocate a new ReorderBufferTXN.
433 */
434static ReorderBufferTXN *
436{
437 ReorderBufferTXN *txn;
438
439 txn = (ReorderBufferTXN *)
440 MemoryContextAlloc(rb->txn_context, sizeof(ReorderBufferTXN));
441
442 memset(txn, 0, sizeof(ReorderBufferTXN));
443
444 dlist_init(&txn->changes);
445 dlist_init(&txn->tuplecids);
446 dlist_init(&txn->subtxns);
447
448 /* InvalidCommandId is not zero, so set it explicitly */
451
452 return txn;
453}
454
455/*
456 * Free a ReorderBufferTXN.
457 */
458static void
460{
461 /* clean the lookup cache if we were cached (quite likely) */
462 if (rb->by_txn_last_xid == txn->xid)
463 {
464 rb->by_txn_last_xid = InvalidTransactionId;
465 rb->by_txn_last_txn = NULL;
466 }
467
468 /* free data that's contained */
469
470 if (txn->gid != NULL)
471 {
472 pfree(txn->gid);
473 txn->gid = NULL;
474 }
475
476 if (txn->tuplecid_hash != NULL)
477 {
479 txn->tuplecid_hash = NULL;
480 }
481
482 if (txn->invalidations)
483 {
484 pfree(txn->invalidations);
485 txn->invalidations = NULL;
486 }
487
489 {
492 }
493
494 /* Reset the toast hash */
496
497 /* All changes must be deallocated */
498 Assert(txn->size == 0);
499
500 pfree(txn);
501}
502
503/*
504 * Allocate a ReorderBufferChange.
505 */
508{
509 ReorderBufferChange *change;
510
511 change = (ReorderBufferChange *)
512 MemoryContextAlloc(rb->change_context, sizeof(ReorderBufferChange));
513
514 memset(change, 0, sizeof(ReorderBufferChange));
515 return change;
516}
517
518/*
519 * Free a ReorderBufferChange and update memory accounting, if requested.
520 */
521void
523 bool upd_mem)
524{
525 /* update memory accounting info */
526 if (upd_mem)
529
530 /* free contained data */
531 switch (change->action)
532 {
537 if (change->data.tp.newtuple)
538 {
540 change->data.tp.newtuple = NULL;
541 }
542
543 if (change->data.tp.oldtuple)
544 {
546 change->data.tp.oldtuple = NULL;
547 }
548 break;
550 if (change->data.msg.prefix != NULL)
551 pfree(change->data.msg.prefix);
552 change->data.msg.prefix = NULL;
553 if (change->data.msg.message != NULL)
554 pfree(change->data.msg.message);
555 change->data.msg.message = NULL;
556 break;
558 if (change->data.inval.invalidations)
559 pfree(change->data.inval.invalidations);
560 change->data.inval.invalidations = NULL;
561 break;
563 if (change->data.snapshot)
564 {
566 change->data.snapshot = NULL;
567 }
568 break;
569 /* no data in addition to the struct itself */
571 if (change->data.truncate.relids != NULL)
572 {
574 change->data.truncate.relids = NULL;
575 }
576 break;
581 break;
582 }
583
584 pfree(change);
585}
586
587/*
588 * Allocate a HeapTuple fitting a tuple of size tuple_len (excluding header
589 * overhead).
590 */
593{
594 HeapTuple tuple;
596
597 alloc_len = tuple_len + SizeofHeapTupleHeader;
598
599 tuple = (HeapTuple) MemoryContextAlloc(rb->tup_context,
601 tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
602
603 return tuple;
604}
605
606/*
607 * Free a HeapTuple returned by ReorderBufferAllocTupleBuf().
608 */
609void
611{
612 pfree(tuple);
613}
614
615/*
616 * Allocate an array for relids of truncated relations.
617 *
618 * We use the global memory context (for the whole reorder buffer), because
619 * none of the existing ones seems like a good match (some are SLAB, so we
620 * can't use those, and tup_context is meant for tuple data, not relids). We
621 * could add yet another context, but it seems like an overkill - TRUNCATE is
622 * not particularly common operation, so it does not seem worth it.
623 */
624Oid *
626{
627 Oid *relids;
629
630 alloc_len = sizeof(Oid) * nrelids;
631
632 relids = (Oid *) MemoryContextAlloc(rb->context, alloc_len);
633
634 return relids;
635}
636
637/*
638 * Free an array of relids.
639 */
640void
642{
643 pfree(relids);
644}
645
646/*
647 * Return the ReorderBufferTXN from the given buffer, specified by Xid.
648 * If create is true, and a transaction doesn't already exist, create it
649 * (with the given LSN, and as top transaction if that's specified);
650 * when this happens, is_new is set to true.
651 */
652static ReorderBufferTXN *
654 bool *is_new, XLogRecPtr lsn, bool create_as_top)
655{
656 ReorderBufferTXN *txn;
658 bool found;
659
661
662 /*
663 * Check the one-entry lookup cache first
664 */
665 if (TransactionIdIsValid(rb->by_txn_last_xid) &&
666 rb->by_txn_last_xid == xid)
667 {
668 txn = rb->by_txn_last_txn;
669
670 if (txn != NULL)
671 {
672 /* found it, and it's valid */
673 if (is_new)
674 *is_new = false;
675 return txn;
676 }
677
678 /*
679 * cached as non-existent, and asked not to create? Then nothing else
680 * to do.
681 */
682 if (!create)
683 return NULL;
684 /* otherwise fall through to create it */
685 }
686
687 /*
688 * If the cache wasn't hit or it yielded a "does-not-exist" and we want to
689 * create an entry.
690 */
691
692 /* search the lookup table */
694 hash_search(rb->by_txn,
695 &xid,
696 create ? HASH_ENTER : HASH_FIND,
697 &found);
698 if (found)
699 txn = ent->txn;
700 else if (create)
701 {
702 /* initialize the new entry, if creation was requested */
703 Assert(ent != NULL);
705
707 ent->txn->xid = xid;
708 txn = ent->txn;
709 txn->first_lsn = lsn;
710 txn->restart_decoding_lsn = rb->current_restart_decoding_lsn;
711
712 if (create_as_top)
713 {
714 dlist_push_tail(&rb->toplevel_by_lsn, &txn->node);
716 }
717 }
718 else
719 txn = NULL; /* not found and not asked to create */
720
721 /* update cache */
722 rb->by_txn_last_xid = xid;
723 rb->by_txn_last_txn = txn;
724
725 if (is_new)
726 *is_new = !found;
727
728 Assert(!create || txn != NULL);
729 return txn;
730}
731
732/*
733 * Record the partial change for the streaming of in-progress transactions. We
734 * can stream only complete changes so if we have a partial change like toast
735 * table insert or speculative insert then we mark such a 'txn' so that it
736 * can't be streamed. We also ensure that if the changes in such a 'txn' can
737 * be streamed and are above logical_decoding_work_mem threshold then we stream
738 * them as soon as we have a complete change.
739 */
740static void
742 ReorderBufferChange *change,
743 bool toast_insert)
744{
745 ReorderBufferTXN *toptxn;
746
747 /*
748 * The partial changes need to be processed only while streaming
749 * in-progress transactions.
750 */
752 return;
753
754 /* Get the top transaction. */
755 toptxn = rbtxn_get_toptxn(txn);
756
757 /*
758 * Indicate a partial change for toast inserts. The change will be
759 * considered as complete once we get the insert or update on the main
760 * table and we are sure that the pending toast chunks are not required
761 * anymore.
762 *
763 * If we allow streaming when there are pending toast chunks then such
764 * chunks won't be released till the insert (multi_insert) is complete and
765 * we expect the txn to have streamed all changes after streaming. This
766 * restriction is mainly to ensure the correctness of streamed
767 * transactions and it doesn't seem worth uplifting such a restriction
768 * just to allow this case because anyway we will stream the transaction
769 * once such an insert is complete.
770 */
771 if (toast_insert)
773 else if (rbtxn_has_partial_change(toptxn) &&
774 IsInsertOrUpdate(change->action) &&
777
778 /*
779 * Indicate a partial change for speculative inserts. The change will be
780 * considered as complete once we get the speculative confirm or abort
781 * token.
782 */
783 if (IsSpecInsert(change->action))
785 else if (rbtxn_has_partial_change(toptxn) &&
788
789 /*
790 * Stream the transaction if it is serialized before and the changes are
791 * now complete in the top-level transaction.
792 *
793 * The reason for doing the streaming of such a transaction as soon as we
794 * get the complete change for it is that previously it would have reached
795 * the memory threshold and wouldn't get streamed because of incomplete
796 * changes. Delaying such transactions would increase apply lag for them.
797 */
799 !(rbtxn_has_partial_change(toptxn)) &&
800 rbtxn_is_serialized(txn) &&
802 ReorderBufferStreamTXN(rb, toptxn);
803}
804
805/*
806 * Queue a change into a transaction so it can be replayed upon commit or will be
807 * streamed when we reach logical_decoding_work_mem threshold.
808 */
809void
811 ReorderBufferChange *change, bool toast_insert)
812{
813 ReorderBufferTXN *txn;
814
815 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
816
817 /*
818 * If we have detected that the transaction is aborted while streaming the
819 * previous changes or by checking its CLOG, there is no point in
820 * collecting further changes for it.
821 */
822 if (rbtxn_is_aborted(txn))
823 {
824 /*
825 * We don't need to update memory accounting for this change as we
826 * have not added it to the queue yet.
827 */
828 ReorderBufferFreeChange(rb, change, false);
829 return;
830 }
831
832 /*
833 * The changes that are sent downstream are considered streamable. We
834 * remember such transactions so that only those will later be considered
835 * for streaming.
836 */
837 if (change->action == REORDER_BUFFER_CHANGE_INSERT ||
843 {
844 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
845
847 }
848
849 change->lsn = lsn;
850 change->txn = txn;
851
853 dlist_push_tail(&txn->changes, &change->node);
854 txn->nentries++;
855 txn->nentries_mem++;
856
857 /* update memory accounting information */
860
861 /* process partial change */
863
864 /* check the memory limits and evict something if needed */
866}
867
868/*
869 * A transactional message is queued to be processed upon commit and a
870 * non-transactional message gets processed immediately.
871 */
872void
875 bool transactional, const char *prefix,
876 Size message_size, const char *message)
877{
878 if (transactional)
879 {
880 MemoryContext oldcontext;
881 ReorderBufferChange *change;
882
884
885 /*
886 * We don't expect snapshots for transactional changes - we'll use the
887 * snapshot derived later during apply (unless the change gets
888 * skipped).
889 */
890 Assert(!snap);
891
892 oldcontext = MemoryContextSwitchTo(rb->context);
893
896 change->data.msg.prefix = pstrdup(prefix);
897 change->data.msg.message_size = message_size;
898 change->data.msg.message = palloc(message_size);
899 memcpy(change->data.msg.message, message, message_size);
900
901 ReorderBufferQueueChange(rb, xid, lsn, change, false);
902
903 MemoryContextSwitchTo(oldcontext);
904 }
905 else
906 {
907 ReorderBufferTXN *txn = NULL;
908 volatile Snapshot snapshot_now = snap;
909
910 /* Non-transactional changes require a valid snapshot. */
911 Assert(snapshot_now);
912
913 if (xid != InvalidTransactionId)
914 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
915
916 /* setup snapshot to allow catalog access */
917 SetupHistoricSnapshot(snapshot_now, NULL);
918 PG_TRY();
919 {
920 rb->message(rb, txn, lsn, false, prefix, message_size, message);
921
923 }
924 PG_CATCH();
925 {
927 PG_RE_THROW();
928 }
929 PG_END_TRY();
930 }
931}
932
933/*
934 * AssertTXNLsnOrder
935 * Verify LSN ordering of transaction lists in the reorderbuffer
936 *
937 * Other LSN-related invariants are checked too.
938 *
939 * No-op if assertions are not in use.
940 */
941static void
943{
944#ifdef USE_ASSERT_CHECKING
945 LogicalDecodingContext *ctx = rb->private_data;
946 dlist_iter iter;
949
950 /*
951 * Skip the verification if we don't reach the LSN at which we start
952 * decoding the contents of transactions yet because until we reach the
953 * LSN, we could have transactions that don't have the association between
954 * the top-level transaction and subtransaction yet and consequently have
955 * the same LSN. We don't guarantee this association until we try to
956 * decode the actual contents of transaction. The ordering of the records
957 * prior to the start_decoding_at LSN should have been checked before the
958 * restart.
959 */
961 return;
962
963 dlist_foreach(iter, &rb->toplevel_by_lsn)
964 {
966 iter.cur);
967
968 /* start LSN must be set */
969 Assert(XLogRecPtrIsValid(cur_txn->first_lsn));
970
971 /* If there is an end LSN, it must be higher than start LSN */
972 if (XLogRecPtrIsValid(cur_txn->end_lsn))
973 Assert(cur_txn->first_lsn <= cur_txn->end_lsn);
974
975 /* Current initial LSN must be strictly higher than previous */
978
979 /* known-as-subtxn txns must not be listed */
981
982 prev_first_lsn = cur_txn->first_lsn;
983 }
984
985 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
986 {
988 base_snapshot_node,
989 iter.cur);
990
991 /* base snapshot (and its LSN) must be set */
992 Assert(cur_txn->base_snapshot != NULL);
993 Assert(XLogRecPtrIsValid(cur_txn->base_snapshot_lsn));
994
995 /* current LSN must be strictly higher than previous */
997 Assert(prev_base_snap_lsn < cur_txn->base_snapshot_lsn);
998
999 /* known-as-subtxn txns must not be listed */
1001
1002 prev_base_snap_lsn = cur_txn->base_snapshot_lsn;
1003 }
1004#endif
1005}
1006
1007/*
1008 * AssertChangeLsnOrder
1009 *
1010 * Check ordering of changes in the (sub)transaction.
1011 */
1012static void
1014{
1015#ifdef USE_ASSERT_CHECKING
1016 dlist_iter iter;
1018
1019 dlist_foreach(iter, &txn->changes)
1020 {
1022
1024
1027 Assert(txn->first_lsn <= cur_change->lsn);
1028
1029 if (XLogRecPtrIsValid(txn->end_lsn))
1030 Assert(cur_change->lsn <= txn->end_lsn);
1031
1033
1034 prev_lsn = cur_change->lsn;
1035 }
1036#endif
1037}
1038
1039/*
1040 * ReorderBufferGetOldestTXN
1041 * Return oldest transaction in reorderbuffer
1042 */
1045{
1046 ReorderBufferTXN *txn;
1047
1049
1050 if (dlist_is_empty(&rb->toplevel_by_lsn))
1051 return NULL;
1052
1053 txn = dlist_head_element(ReorderBufferTXN, node, &rb->toplevel_by_lsn);
1054
1057 return txn;
1058}
1059
1060/*
1061 * ReorderBufferGetOldestXmin
1062 * Return oldest Xmin in reorderbuffer
1063 *
1064 * Returns oldest possibly running Xid from the point of view of snapshots
1065 * used in the transactions kept by reorderbuffer, or InvalidTransactionId if
1066 * there are none.
1067 *
1068 * Since snapshots are assigned monotonically, this equals the Xmin of the
1069 * base snapshot with minimal base_snapshot_lsn.
1070 */
1073{
1074 ReorderBufferTXN *txn;
1075
1077
1078 if (dlist_is_empty(&rb->txns_by_base_snapshot_lsn))
1079 return InvalidTransactionId;
1080
1081 txn = dlist_head_element(ReorderBufferTXN, base_snapshot_node,
1082 &rb->txns_by_base_snapshot_lsn);
1083 return txn->base_snapshot->xmin;
1084}
1085
1086void
1088{
1089 rb->current_restart_decoding_lsn = ptr;
1090}
1091
1092/*
1093 * ReorderBufferAssignChild
1094 *
1095 * Make note that we know that subxid is a subtransaction of xid, seen as of
1096 * the given lsn.
1097 */
1098void
1100 TransactionId subxid, XLogRecPtr lsn)
1101{
1102 ReorderBufferTXN *txn;
1104 bool new_top;
1105 bool new_sub;
1106
1107 txn = ReorderBufferTXNByXid(rb, xid, true, &new_top, lsn, true);
1108 subtxn = ReorderBufferTXNByXid(rb, subxid, true, &new_sub, lsn, false);
1109
1110 if (!new_sub)
1111 {
1113 {
1114 /* already associated, nothing to do */
1115 return;
1116 }
1117 else
1118 {
1119 /*
1120 * We already saw this transaction, but initially added it to the
1121 * list of top-level txns. Now that we know it's not top-level,
1122 * remove it from there.
1123 */
1124 dlist_delete(&subtxn->node);
1125 }
1126 }
1127
1128 subtxn->txn_flags |= RBTXN_IS_SUBXACT;
1129 subtxn->toplevel_xid = xid;
1130 Assert(subtxn->nsubtxns == 0);
1131
1132 /* set the reference to top-level transaction */
1133 subtxn->toptxn = txn;
1134
1135 /* add to subtransaction list */
1136 dlist_push_tail(&txn->subtxns, &subtxn->node);
1137 txn->nsubtxns++;
1138
1139 /* Possibly transfer the subtxn's snapshot to its top-level txn. */
1141
1142 /* Verify LSN-ordering invariant */
1144}
1145
1146/*
1147 * ReorderBufferTransferSnapToParent
1148 * Transfer base snapshot from subtxn to top-level txn, if needed
1149 *
1150 * This is done if the top-level txn doesn't have a base snapshot, or if the
1151 * subtxn's base snapshot has an earlier LSN than the top-level txn's base
1152 * snapshot's LSN. This can happen if there are no changes in the toplevel
1153 * txn but there are some in the subtxn, or the first change in subtxn has
1154 * earlier LSN than first change in the top-level txn and we learned about
1155 * their kinship only now.
1156 *
1157 * The subtransaction's snapshot is cleared regardless of the transfer
1158 * happening, since it's not needed anymore in either case.
1159 *
1160 * We do this as soon as we become aware of their kinship, to avoid queueing
1161 * extra snapshots to txns known-as-subtxns -- only top-level txns will
1162 * receive further snapshots.
1163 */
1164static void
1167{
1168 Assert(subtxn->toplevel_xid == txn->xid);
1169
1170 if (subtxn->base_snapshot != NULL)
1171 {
1172 if (txn->base_snapshot == NULL ||
1173 subtxn->base_snapshot_lsn < txn->base_snapshot_lsn)
1174 {
1175 /*
1176 * If the toplevel transaction already has a base snapshot but
1177 * it's newer than the subxact's, purge it.
1178 */
1179 if (txn->base_snapshot != NULL)
1180 {
1183 }
1184
1185 /*
1186 * The snapshot is now the top transaction's; transfer it, and
1187 * adjust the list position of the top transaction in the list by
1188 * moving it to where the subtransaction is.
1189 */
1190 txn->base_snapshot = subtxn->base_snapshot;
1191 txn->base_snapshot_lsn = subtxn->base_snapshot_lsn;
1192 dlist_insert_before(&subtxn->base_snapshot_node,
1193 &txn->base_snapshot_node);
1194
1195 /*
1196 * The subtransaction doesn't have a snapshot anymore (so it
1197 * mustn't be in the list.)
1198 */
1199 subtxn->base_snapshot = NULL;
1200 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1201 dlist_delete(&subtxn->base_snapshot_node);
1202 }
1203 else
1204 {
1205 /* Base snap of toplevel is fine, so subxact's is not needed */
1206 SnapBuildSnapDecRefcount(subtxn->base_snapshot);
1207 dlist_delete(&subtxn->base_snapshot_node);
1208 subtxn->base_snapshot = NULL;
1209 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1210 }
1211 }
1212}
1213
1214/*
1215 * Associate a subtransaction with its toplevel transaction at commit
1216 * time. There may be no further changes added after this.
1217 */
1218void
1220 TransactionId subxid, XLogRecPtr commit_lsn,
1221 XLogRecPtr end_lsn)
1222{
1224
1225 subtxn = ReorderBufferTXNByXid(rb, subxid, false, NULL,
1226 InvalidXLogRecPtr, false);
1227
1228 /*
1229 * No need to do anything if that subtxn didn't contain any changes
1230 */
1231 if (!subtxn)
1232 return;
1233
1234 subtxn->final_lsn = commit_lsn;
1235 subtxn->end_lsn = end_lsn;
1236
1237 /*
1238 * Assign this subxact as a child of the toplevel xact (no-op if already
1239 * done.)
1240 */
1242}
1243
1244
1245/*
1246 * Support for efficiently iterating over a transaction's and its
1247 * subtransactions' changes.
1248 *
1249 * We do by doing a k-way merge between transactions/subtransactions. For that
1250 * we model the current heads of the different transactions as a binary heap
1251 * so we easily know which (sub-)transaction has the change with the smallest
1252 * lsn next.
1253 *
1254 * We assume the changes in individual transactions are already sorted by LSN.
1255 */
1256
1257/*
1258 * Binary heap comparison function.
1259 */
1260static int
1262{
1264 XLogRecPtr pos_a = state->entries[DatumGetInt32(a)].lsn;
1265 XLogRecPtr pos_b = state->entries[DatumGetInt32(b)].lsn;
1266
1267 if (pos_a < pos_b)
1268 return 1;
1269 else if (pos_a == pos_b)
1270 return 0;
1271 return -1;
1272}
1273
1274/*
1275 * Allocate & initialize an iterator which iterates in lsn order over a
1276 * transaction and all its subtransactions.
1277 *
1278 * Note: The iterator state is returned through iter_state parameter rather
1279 * than the function's return value. This is because the state gets cleaned up
1280 * in a PG_CATCH block in the caller, so we want to make sure the caller gets
1281 * back the state even if this function throws an exception.
1282 */
1283static void
1286{
1287 Size nr_txns = 0;
1290 int32 off;
1291
1292 *iter_state = NULL;
1293
1294 /* Check ordering of changes in the toplevel transaction. */
1296
1297 /*
1298 * Calculate the size of our heap: one element for every transaction that
1299 * contains changes. (Besides the transactions already in the reorder
1300 * buffer, we count the one we were directly passed.)
1301 */
1302 if (txn->nentries > 0)
1303 nr_txns++;
1304
1306 {
1308
1310
1311 /* Check ordering of changes in this subtransaction. */
1313
1314 if (cur_txn->nentries > 0)
1315 nr_txns++;
1316 }
1317
1318 /* allocate iteration state */
1320 MemoryContextAllocZero(rb->context,
1322 sizeof(ReorderBufferIterTXNEntry) * nr_txns);
1323
1324 state->nr_txns = nr_txns;
1325 dlist_init(&state->old_change);
1326
1327 for (off = 0; off < state->nr_txns; off++)
1328 {
1329 state->entries[off].file.vfd = -1;
1330 state->entries[off].segno = 0;
1331 }
1332
1333 /* allocate heap */
1334 state->heap = binaryheap_allocate(state->nr_txns,
1336 state);
1337
1338 /* Now that the state fields are initialized, it is safe to return it. */
1339 *iter_state = state;
1340
1341 /*
1342 * Now insert items into the binary heap, in an unordered fashion. (We
1343 * will run a heap assembly step at the end; this is more efficient.)
1344 */
1345
1346 off = 0;
1347
1348 /* add toplevel transaction if it contains changes */
1349 if (txn->nentries > 0)
1350 {
1352
1353 if (rbtxn_is_serialized(txn))
1354 {
1355 /* serialize remaining changes */
1357 ReorderBufferRestoreChanges(rb, txn, &state->entries[off].file,
1358 &state->entries[off].segno);
1359 }
1360
1362 &txn->changes);
1363
1364 state->entries[off].lsn = cur_change->lsn;
1365 state->entries[off].change = cur_change;
1366 state->entries[off].txn = txn;
1367
1369 }
1370
1371 /* add subtransactions if they contain changes */
1373 {
1375
1377
1378 if (cur_txn->nentries > 0)
1379 {
1381
1383 {
1384 /* serialize remaining changes */
1387 &state->entries[off].file,
1388 &state->entries[off].segno);
1389 }
1391 &cur_txn->changes);
1392
1393 state->entries[off].lsn = cur_change->lsn;
1394 state->entries[off].change = cur_change;
1395 state->entries[off].txn = cur_txn;
1396
1398 }
1399 }
1400
1401 /* assemble a valid binary heap */
1402 binaryheap_build(state->heap);
1403}
1404
1405/*
1406 * Return the next change when iterating over a transaction and its
1407 * subtransactions.
1408 *
1409 * Returns NULL when no further changes exist.
1410 */
1411static ReorderBufferChange *
1413{
1414 ReorderBufferChange *change;
1416 int32 off;
1417
1418 /* nothing there anymore */
1419 if (binaryheap_empty(state->heap))
1420 return NULL;
1421
1422 off = DatumGetInt32(binaryheap_first(state->heap));
1423 entry = &state->entries[off];
1424
1425 /* free memory we might have "leaked" in the previous *Next call */
1426 if (!dlist_is_empty(&state->old_change))
1427 {
1428 change = dlist_container(ReorderBufferChange, node,
1429 dlist_pop_head_node(&state->old_change));
1430 ReorderBufferFreeChange(rb, change, true);
1431 Assert(dlist_is_empty(&state->old_change));
1432 }
1433
1434 change = entry->change;
1435
1436 /*
1437 * update heap with information about which transaction has the next
1438 * relevant change in LSN order
1439 */
1440
1441 /* there are in-memory changes */
1442 if (dlist_has_next(&entry->txn->changes, &entry->change->node))
1443 {
1444 dlist_node *next = dlist_next_node(&entry->txn->changes, &change->node);
1447
1448 /* txn stays the same */
1449 state->entries[off].lsn = next_change->lsn;
1450 state->entries[off].change = next_change;
1451
1453 return change;
1454 }
1455
1456 /* try to load changes from disk */
1457 if (entry->txn->nentries != entry->txn->nentries_mem)
1458 {
1459 /*
1460 * Ugly: restoring changes will reuse *Change records, thus delete the
1461 * current one from the per-tx list and only free in the next call.
1462 */
1463 dlist_delete(&change->node);
1464 dlist_push_tail(&state->old_change, &change->node);
1465
1466 /*
1467 * Update the total bytes processed by the txn for which we are
1468 * releasing the current set of changes and restoring the new set of
1469 * changes.
1470 */
1471 rb->totalBytes += entry->txn->size;
1472 if (ReorderBufferRestoreChanges(rb, entry->txn, &entry->file,
1473 &state->entries[off].segno))
1474 {
1475 /* successfully restored changes from disk */
1478 &entry->txn->changes);
1479
1480 elog(DEBUG2, "restored %u/%u changes from disk",
1481 (uint32) entry->txn->nentries_mem,
1482 (uint32) entry->txn->nentries);
1483
1484 Assert(entry->txn->nentries_mem);
1485 /* txn stays the same */
1486 state->entries[off].lsn = next_change->lsn;
1487 state->entries[off].change = next_change;
1489
1490 return change;
1491 }
1492 }
1493
1494 /* ok, no changes there anymore, remove */
1496
1497 return change;
1498}
1499
1500/*
1501 * Deallocate the iterator
1502 */
1503static void
1506{
1507 int32 off;
1508
1509 for (off = 0; off < state->nr_txns; off++)
1510 {
1511 if (state->entries[off].file.vfd != -1)
1512 FileClose(state->entries[off].file.vfd);
1513 }
1514
1515 /* free memory we might have "leaked" in the last *Next call */
1516 if (!dlist_is_empty(&state->old_change))
1517 {
1518 ReorderBufferChange *change;
1519
1520 change = dlist_container(ReorderBufferChange, node,
1521 dlist_pop_head_node(&state->old_change));
1522 ReorderBufferFreeChange(rb, change, true);
1523 Assert(dlist_is_empty(&state->old_change));
1524 }
1525
1526 binaryheap_free(state->heap);
1527 pfree(state);
1528}
1529
1530/*
1531 * Cleanup the contents of a transaction, usually after the transaction
1532 * committed or aborted.
1533 */
1534static void
1536{
1537 bool found;
1538 dlist_mutable_iter iter;
1539 Size mem_freed = 0;
1540
1541 /* cleanup subtransactions & their changes */
1542 dlist_foreach_modify(iter, &txn->subtxns)
1543 {
1545
1547
1548 /*
1549 * Subtransactions are always associated to the toplevel TXN, even if
1550 * they originally were happening inside another subtxn, so we won't
1551 * ever recurse more than one level deep here.
1552 */
1554 Assert(subtxn->nsubtxns == 0);
1555
1557 }
1558
1559 /* cleanup changes in the txn */
1560 dlist_foreach_modify(iter, &txn->changes)
1561 {
1562 ReorderBufferChange *change;
1563
1564 change = dlist_container(ReorderBufferChange, node, iter.cur);
1565
1566 /* Check we're not mixing changes from different transactions. */
1567 Assert(change->txn == txn);
1568
1569 /*
1570 * Instead of updating the memory counter for individual changes, we
1571 * sum up the size of memory to free so we can update the memory
1572 * counter all together below. This saves costs of maintaining the
1573 * max-heap.
1574 */
1576
1577 ReorderBufferFreeChange(rb, change, false);
1578 }
1579
1580 /* Update the memory counter */
1582
1583 /*
1584 * Cleanup the tuplecids we stored for decoding catalog snapshot access.
1585 * They are always stored in the toplevel transaction.
1586 */
1587 dlist_foreach_modify(iter, &txn->tuplecids)
1588 {
1589 ReorderBufferChange *change;
1590
1591 change = dlist_container(ReorderBufferChange, node, iter.cur);
1592
1593 /* Check we're not mixing changes from different transactions. */
1594 Assert(change->txn == txn);
1596
1597 ReorderBufferFreeChange(rb, change, true);
1598 }
1599
1600 /*
1601 * Cleanup the base snapshot, if set.
1602 */
1603 if (txn->base_snapshot != NULL)
1604 {
1607 }
1608
1609 /*
1610 * Cleanup the snapshot for the last streamed run.
1611 */
1612 if (txn->snapshot_now != NULL)
1613 {
1616 }
1617
1618 /*
1619 * Remove TXN from its containing lists.
1620 *
1621 * Note: if txn is known as subxact, we are deleting the TXN from its
1622 * parent's list of known subxacts; this leaves the parent's nsubxacts
1623 * count too high, but we don't care. Otherwise, we are deleting the TXN
1624 * from the LSN-ordered list of toplevel TXNs. We remove the TXN from the
1625 * list of catalog modifying transactions as well.
1626 */
1627 dlist_delete(&txn->node);
1629 dclist_delete_from(&rb->catchange_txns, &txn->catchange_node);
1630
1631 /* now remove reference from buffer */
1632 hash_search(rb->by_txn, &txn->xid, HASH_REMOVE, &found);
1633 Assert(found);
1634
1635 /* remove entries spilled to disk */
1636 if (rbtxn_is_serialized(txn))
1638
1639 /* deallocate */
1641}
1642
1643/*
1644 * Discard changes from a transaction (and subtransactions), either after
1645 * streaming, decoding them at PREPARE, or detecting the transaction abort.
1646 * Keep the remaining info - transactions, tuplecids, invalidations and
1647 * snapshots.
1648 *
1649 * We additionally remove tuplecids after decoding the transaction at prepare
1650 * time as we only need to perform invalidation at rollback or commit prepared.
1651 *
1652 * 'txn_prepared' indicates that we have decoded the transaction at prepare
1653 * time.
1654 */
1655static void
1657{
1658 dlist_mutable_iter iter;
1659 Size mem_freed = 0;
1660
1661 /* cleanup subtransactions & their changes */
1662 dlist_foreach_modify(iter, &txn->subtxns)
1663 {
1665
1667
1668 /*
1669 * Subtransactions are always associated to the toplevel TXN, even if
1670 * they originally were happening inside another subtxn, so we won't
1671 * ever recurse more than one level deep here.
1672 */
1674 Assert(subtxn->nsubtxns == 0);
1675
1678 }
1679
1680 /* cleanup changes in the txn */
1681 dlist_foreach_modify(iter, &txn->changes)
1682 {
1683 ReorderBufferChange *change;
1684
1685 change = dlist_container(ReorderBufferChange, node, iter.cur);
1686
1687 /* Check we're not mixing changes from different transactions. */
1688 Assert(change->txn == txn);
1689
1690 /* remove the change from its containing list */
1691 dlist_delete(&change->node);
1692
1693 /*
1694 * Instead of updating the memory counter for individual changes, we
1695 * sum up the size of memory to free so we can update the memory
1696 * counter all together below. This saves costs of maintaining the
1697 * max-heap.
1698 */
1700
1701 ReorderBufferFreeChange(rb, change, false);
1702 }
1703
1704 /* Update the memory counter */
1706
1707 if (txn_prepared)
1708 {
1709 /*
1710 * If this is a prepared txn, cleanup the tuplecids we stored for
1711 * decoding catalog snapshot access. They are always stored in the
1712 * toplevel transaction.
1713 */
1714 dlist_foreach_modify(iter, &txn->tuplecids)
1715 {
1716 ReorderBufferChange *change;
1717
1718 change = dlist_container(ReorderBufferChange, node, iter.cur);
1719
1720 /* Check we're not mixing changes from different transactions. */
1721 Assert(change->txn == txn);
1723
1724 /* Remove the change from its containing list. */
1725 dlist_delete(&change->node);
1726
1727 ReorderBufferFreeChange(rb, change, true);
1728 }
1729 }
1730
1731 /*
1732 * Destroy the (relfilelocator, ctid) hashtable, so that we don't leak any
1733 * memory. We could also keep the hash table and update it with new ctid
1734 * values, but this seems simpler and good enough for now.
1735 */
1736 if (txn->tuplecid_hash != NULL)
1737 {
1739 txn->tuplecid_hash = NULL;
1740 }
1741
1742 /* If this txn is serialized then clean the disk space. */
1743 if (rbtxn_is_serialized(txn))
1744 {
1747
1748 /*
1749 * We set this flag to indicate if the transaction is ever serialized.
1750 * We need this to accurately update the stats as otherwise the same
1751 * transaction can be counted as serialized multiple times.
1752 */
1754 }
1755
1756 /* also reset the number of entries in the transaction */
1757 txn->nentries_mem = 0;
1758 txn->nentries = 0;
1759}
1760
1761/*
1762 * Check the transaction status by CLOG lookup and discard all changes if
1763 * the transaction is aborted. The transaction status is cached in
1764 * txn->txn_flags so we can skip future changes and avoid CLOG lookups on the
1765 * next call.
1766 *
1767 * Return true if the transaction is aborted, otherwise return false.
1768 *
1769 * When the 'debug_logical_replication_streaming' is set to "immediate", we
1770 * don't check the transaction status, meaning the caller will always process
1771 * this transaction.
1772 */
1773static bool
1775{
1776 /* Quick return for regression tests */
1778 return false;
1779
1780 /*
1781 * Quick return if the transaction status is already known.
1782 */
1783
1784 if (rbtxn_is_committed(txn))
1785 return false;
1786 if (rbtxn_is_aborted(txn))
1787 {
1788 /* Already-aborted transactions should not have any changes */
1789 Assert(txn->size == 0);
1790
1791 return true;
1792 }
1793
1794 /* Otherwise, check the transaction status using CLOG lookup */
1795
1797 return false;
1798
1799 if (TransactionIdDidCommit(txn->xid))
1800 {
1801 /*
1802 * Remember the transaction is committed so that we can skip CLOG
1803 * check next time, avoiding the pressure on CLOG lookup.
1804 */
1805 Assert(!rbtxn_is_aborted(txn));
1807 return false;
1808 }
1809
1810 /*
1811 * The transaction aborted. We discard both the changes collected so far
1812 * and the toast reconstruction data. The full cleanup will happen as part
1813 * of decoding ABORT record of this transaction.
1814 */
1817
1818 /* All changes should be discarded */
1819 Assert(txn->size == 0);
1820
1821 /*
1822 * Mark the transaction as aborted so we can ignore future changes of this
1823 * transaction.
1824 */
1827
1828 return true;
1829}
1830
1831/*
1832 * Build a hash with a (relfilelocator, ctid) -> (cmin, cmax) mapping for use by
1833 * HeapTupleSatisfiesHistoricMVCC.
1834 */
1835static void
1837{
1838 dlist_iter iter;
1840
1842 return;
1843
1845 hash_ctl.entrysize = sizeof(ReorderBufferTupleCidEnt);
1846 hash_ctl.hcxt = rb->context;
1847
1848 /*
1849 * create the hash with the exact number of to-be-stored tuplecids from
1850 * the start
1851 */
1852 txn->tuplecid_hash =
1853 hash_create("ReorderBufferTupleCid", txn->ntuplecids, &hash_ctl,
1855
1856 dlist_foreach(iter, &txn->tuplecids)
1857 {
1860 bool found;
1861 ReorderBufferChange *change;
1862
1863 change = dlist_container(ReorderBufferChange, node, iter.cur);
1864
1866
1867 /* be careful about padding */
1868 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
1869
1870 key.rlocator = change->data.tuplecid.locator;
1871
1873 &key.tid);
1874
1876 hash_search(txn->tuplecid_hash, &key, HASH_ENTER, &found);
1877 if (!found)
1878 {
1879 ent->cmin = change->data.tuplecid.cmin;
1880 ent->cmax = change->data.tuplecid.cmax;
1881 ent->combocid = change->data.tuplecid.combocid;
1882 }
1883 else
1884 {
1885 /*
1886 * Maybe we already saw this tuple before in this transaction, but
1887 * if so it must have the same cmin.
1888 */
1889 Assert(ent->cmin == change->data.tuplecid.cmin);
1890
1891 /*
1892 * cmax may be initially invalid, but once set it can only grow,
1893 * and never become invalid again.
1894 */
1895 Assert((ent->cmax == InvalidCommandId) ||
1896 ((change->data.tuplecid.cmax != InvalidCommandId) &&
1897 (change->data.tuplecid.cmax > ent->cmax)));
1898 ent->cmax = change->data.tuplecid.cmax;
1899 }
1900 }
1901}
1902
1903/*
1904 * Copy a provided snapshot so we can modify it privately. This is needed so
1905 * that catalog modifying transactions can look into intermediate catalog
1906 * states.
1907 */
1908static Snapshot
1911{
1912 Snapshot snap;
1913 dlist_iter iter;
1914 int i = 0;
1915 Size size;
1916
1917 size = sizeof(SnapshotData) +
1918 sizeof(TransactionId) * orig_snap->xcnt +
1919 sizeof(TransactionId) * (txn->nsubtxns + 1);
1920
1921 snap = MemoryContextAllocZero(rb->context, size);
1922 memcpy(snap, orig_snap, sizeof(SnapshotData));
1923
1924 snap->copied = true;
1925 snap->active_count = 1; /* mark as active so nobody frees it */
1926 snap->regd_count = 0;
1927 snap->xip = (TransactionId *) (snap + 1);
1928
1929 memcpy(snap->xip, orig_snap->xip, sizeof(TransactionId) * snap->xcnt);
1930
1931 /*
1932 * snap->subxip contains all txids that belong to our transaction which we
1933 * need to check via cmin/cmax. That's why we store the toplevel
1934 * transaction in there as well.
1935 */
1936 snap->subxip = snap->xip + snap->xcnt;
1937 snap->subxip[i++] = txn->xid;
1938
1939 /*
1940 * txn->nsubtxns isn't decreased when subtransactions abort, so count
1941 * manually. Since it's an upper boundary it is safe to use it for the
1942 * allocation above.
1943 */
1944 snap->subxcnt = 1;
1945
1946 dlist_foreach(iter, &txn->subtxns)
1947 {
1949
1951 snap->subxip[i++] = sub_txn->xid;
1952 snap->subxcnt++;
1953 }
1954
1955 /* sort so we can bsearch() later */
1956 qsort(snap->subxip, snap->subxcnt, sizeof(TransactionId), xidComparator);
1957
1958 /* store the specified current CommandId */
1959 snap->curcid = cid;
1960
1961 return snap;
1962}
1963
1964/*
1965 * Free a previously ReorderBufferCopySnap'ed snapshot
1966 */
1967static void
1969{
1970 if (snap->copied)
1971 pfree(snap);
1972 else
1974}
1975
1976/*
1977 * If the transaction was (partially) streamed, we need to prepare or commit
1978 * it in a 'streamed' way. That is, we first stream the remaining part of the
1979 * transaction, and then invoke stream_prepare or stream_commit message as per
1980 * the case.
1981 */
1982static void
1984{
1985 /* we should only call this for previously streamed transactions */
1987
1989
1990 if (rbtxn_is_prepared(txn))
1991 {
1992 /*
1993 * Note, we send stream prepare even if a concurrent abort is
1994 * detected. See DecodePrepare for more information.
1995 */
1997 rb->stream_prepare(rb, txn, txn->final_lsn);
1999
2000 /*
2001 * This is a PREPARED transaction, part of a two-phase commit. The
2002 * full cleanup will happen as part of the COMMIT PREPAREDs, so now
2003 * just truncate txn by removing changes and tuplecids.
2004 */
2005 ReorderBufferTruncateTXN(rb, txn, true);
2006 /* Reset the CheckXidAlive */
2008 }
2009 else
2010 {
2011 rb->stream_commit(rb, txn, txn->final_lsn);
2013 }
2014}
2015
2016/*
2017 * Set xid to detect concurrent aborts.
2018 *
2019 * While streaming an in-progress transaction or decoding a prepared
2020 * transaction there is a possibility that the (sub)transaction might get
2021 * aborted concurrently. In such case if the (sub)transaction has catalog
2022 * update then we might decode the tuple using wrong catalog version. For
2023 * example, suppose there is one catalog tuple with (xmin: 500, xmax: 0). Now,
2024 * the transaction 501 updates the catalog tuple and after that we will have
2025 * two tuples (xmin: 500, xmax: 501) and (xmin: 501, xmax: 0). Now, if 501 is
2026 * aborted and some other transaction say 502 updates the same catalog tuple
2027 * then the first tuple will be changed to (xmin: 500, xmax: 502). So, the
2028 * problem is that when we try to decode the tuple inserted/updated in 501
2029 * after the catalog update, we will see the catalog tuple with (xmin: 500,
2030 * xmax: 502) as visible because it will consider that the tuple is deleted by
2031 * xid 502 which is not visible to our snapshot. And when we will try to
2032 * decode with that catalog tuple, it can lead to a wrong result or a crash.
2033 * So, it is necessary to detect concurrent aborts to allow streaming of
2034 * in-progress transactions or decoding of prepared transactions.
2035 *
2036 * For detecting the concurrent abort we set CheckXidAlive to the current
2037 * (sub)transaction's xid for which this change belongs to. And, during
2038 * catalog scan we can check the status of the xid and if it is aborted we will
2039 * report a specific error so that we can stop streaming current transaction
2040 * and discard the already streamed changes on such an error. We might have
2041 * already streamed some of the changes for the aborted (sub)transaction, but
2042 * that is fine because when we decode the abort we will stream abort message
2043 * to truncate the changes in the subscriber. Similarly, for prepared
2044 * transactions, we stop decoding if concurrent abort is detected and then
2045 * rollback the changes when rollback prepared is encountered. See
2046 * DecodePrepare.
2047 */
2048static inline void
2050{
2051 /*
2052 * If the input transaction id is already set as a CheckXidAlive then
2053 * nothing to do.
2054 */
2056 return;
2057
2058 /*
2059 * setup CheckXidAlive if it's not committed yet. We don't check if the
2060 * xid is aborted. That will happen during catalog access.
2061 */
2062 if (!TransactionIdDidCommit(xid))
2063 CheckXidAlive = xid;
2064 else
2066}
2067
2068/*
2069 * Helper function for ReorderBufferProcessTXN for applying change.
2070 */
2071static inline void
2073 Relation relation, ReorderBufferChange *change,
2074 bool streaming)
2075{
2076 if (streaming)
2077 rb->stream_change(rb, txn, relation, change);
2078 else
2079 rb->apply_change(rb, txn, relation, change);
2080}
2081
2082/*
2083 * Helper function for ReorderBufferProcessTXN for applying the truncate.
2084 */
2085static inline void
2087 int nrelations, Relation *relations,
2088 ReorderBufferChange *change, bool streaming)
2089{
2090 if (streaming)
2091 rb->stream_truncate(rb, txn, nrelations, relations, change);
2092 else
2093 rb->apply_truncate(rb, txn, nrelations, relations, change);
2094}
2095
2096/*
2097 * Helper function for ReorderBufferProcessTXN for applying the message.
2098 */
2099static inline void
2101 ReorderBufferChange *change, bool streaming)
2102{
2103 if (streaming)
2104 rb->stream_message(rb, txn, change->lsn, true,
2105 change->data.msg.prefix,
2106 change->data.msg.message_size,
2107 change->data.msg.message);
2108 else
2109 rb->message(rb, txn, change->lsn, true,
2110 change->data.msg.prefix,
2111 change->data.msg.message_size,
2112 change->data.msg.message);
2113}
2114
2115/*
2116 * Function to store the command id and snapshot at the end of the current
2117 * stream so that we can reuse the same while sending the next stream.
2118 */
2119static inline void
2121 Snapshot snapshot_now, CommandId command_id)
2122{
2123 txn->command_id = command_id;
2124
2125 /* Avoid copying if it's already copied. */
2126 if (snapshot_now->copied)
2127 txn->snapshot_now = snapshot_now;
2128 else
2129 txn->snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2130 txn, command_id);
2131}
2132
2133/*
2134 * Mark the given transaction as streamed if it's a top-level transaction
2135 * or has changes.
2136 */
2137static void
2139{
2140 /*
2141 * The top-level transaction, is marked as streamed always, even if it
2142 * does not contain any changes (that is, when all the changes are in
2143 * subtransactions).
2144 *
2145 * For subtransactions, we only mark them as streamed when there are
2146 * changes in them.
2147 *
2148 * We do it this way because of aborts - we don't want to send aborts for
2149 * XIDs the downstream is not aware of. And of course, it always knows
2150 * about the top-level xact (we send the XID in all messages), but we
2151 * never stream XIDs of empty subxacts.
2152 */
2153 if (rbtxn_is_toptxn(txn) || (txn->nentries_mem != 0))
2155}
2156
2157/*
2158 * Helper function for ReorderBufferProcessTXN to handle the concurrent
2159 * abort of the streaming transaction. This resets the TXN such that it
2160 * can be used to stream the remaining data of transaction being processed.
2161 * This can happen when the subtransaction is aborted and we still want to
2162 * continue processing the main or other subtransactions data.
2163 */
2164static void
2166 Snapshot snapshot_now,
2167 CommandId command_id,
2168 XLogRecPtr last_lsn)
2169{
2170 /* Discard the changes that we just streamed */
2172
2173 /* Free all resources allocated for toast reconstruction */
2175
2176 /*
2177 * For the streaming case, stop the stream and remember the command ID and
2178 * snapshot for the streaming run.
2179 */
2180 if (rbtxn_is_streamed(txn))
2181 {
2182 rb->stream_stop(rb, txn, last_lsn);
2183 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2184 }
2185
2186 /* All changes must be deallocated */
2187 Assert(txn->size == 0);
2188}
2189
2190/*
2191 * Helper function for ReorderBufferReplay and ReorderBufferStreamTXN.
2192 *
2193 * Send data of a transaction (and its subtransactions) to the
2194 * output plugin. We iterate over the top and subtransactions (using a k-way
2195 * merge) and replay the changes in lsn order.
2196 *
2197 * If streaming is true then data will be sent using stream API.
2198 *
2199 * Note: "volatile" markers on some parameters are to avoid trouble with
2200 * PG_TRY inside the function.
2201 */
2202static void
2204 XLogRecPtr commit_lsn,
2205 volatile Snapshot snapshot_now,
2206 volatile CommandId command_id,
2207 bool streaming)
2208{
2209 bool using_subtxn;
2215 volatile bool stream_started = false;
2216 ReorderBufferTXN *volatile curtxn = NULL;
2217
2218 /* build data to be able to lookup the CommandIds of catalog tuples */
2220
2221 /* setup the initial snapshot */
2222 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2223
2224 /*
2225 * Decoding needs access to syscaches et al., which in turn use
2226 * heavyweight locks and such. Thus we need to have enough state around to
2227 * keep track of those. The easiest way is to simply use a transaction
2228 * internally. That also allows us to easily enforce that nothing writes
2229 * to the database by checking for xid assignments.
2230 *
2231 * When we're called via the SQL SRF there's already a transaction
2232 * started, so start an explicit subtransaction there.
2233 */
2235
2236 PG_TRY();
2237 {
2238 ReorderBufferChange *change;
2239 int changes_count = 0; /* used to accumulate the number of
2240 * changes */
2241
2242 if (using_subtxn)
2243 BeginInternalSubTransaction(streaming ? "stream" : "replay");
2244 else
2246
2247 /*
2248 * We only need to send begin/begin-prepare for non-streamed
2249 * transactions.
2250 */
2251 if (!streaming)
2252 {
2253 if (rbtxn_is_prepared(txn))
2254 rb->begin_prepare(rb, txn);
2255 else
2256 rb->begin(rb, txn);
2257 }
2258
2260 while ((change = ReorderBufferIterTXNNext(rb, iterstate)) != NULL)
2261 {
2262 Relation relation = NULL;
2263 Oid reloid;
2264
2266
2267 /*
2268 * We can't call start stream callback before processing first
2269 * change.
2270 */
2272 {
2273 if (streaming)
2274 {
2275 txn->origin_id = change->origin_id;
2276 rb->stream_start(rb, txn, change->lsn);
2277 stream_started = true;
2278 }
2279 }
2280
2281 /*
2282 * Enforce correct ordering of changes, merged from multiple
2283 * subtransactions. The changes may have the same LSN due to
2284 * MULTI_INSERT xlog records.
2285 */
2287
2288 prev_lsn = change->lsn;
2289
2290 /*
2291 * Set the current xid to detect concurrent aborts. This is
2292 * required for the cases when we decode the changes before the
2293 * COMMIT record is processed.
2294 */
2295 if (streaming || rbtxn_is_prepared(change->txn))
2296 {
2297 curtxn = change->txn;
2299 }
2300
2301 switch (change->action)
2302 {
2304
2305 /*
2306 * Confirmation for speculative insertion arrived. Simply
2307 * use as a normal record. It'll be cleaned up at the end
2308 * of INSERT processing.
2309 */
2310 if (specinsert == NULL)
2311 elog(ERROR, "invalid ordering of speculative insertion changes");
2312 Assert(specinsert->data.tp.oldtuple == NULL);
2313 change = specinsert;
2315
2316 /* intentionally fall through */
2321 Assert(snapshot_now);
2322
2323 reloid = RelidByRelfilenumber(change->data.tp.rlocator.spcOid,
2324 change->data.tp.rlocator.relNumber);
2325
2326 /*
2327 * Mapped catalog tuple without data, emitted while
2328 * catalog table was in the process of being rewritten. We
2329 * can fail to look up the relfilenumber, because the
2330 * relmapper has no "historic" view, in contrast to the
2331 * normal catalog during decoding. Thus repeated rewrites
2332 * can cause a lookup failure. That's OK because we do not
2333 * decode catalog changes anyway. Normally such tuples
2334 * would be skipped over below, but we can't identify
2335 * whether the table should be logically logged without
2336 * mapping the relfilenumber to the oid.
2337 */
2338 if (reloid == InvalidOid &&
2339 change->data.tp.newtuple == NULL &&
2340 change->data.tp.oldtuple == NULL)
2341 goto change_done;
2342 else if (reloid == InvalidOid)
2343 elog(ERROR, "could not map filenumber \"%s\" to relation OID",
2344 relpathperm(change->data.tp.rlocator,
2345 MAIN_FORKNUM).str);
2346
2347 relation = RelationIdGetRelation(reloid);
2348
2349 if (!RelationIsValid(relation))
2350 elog(ERROR, "could not open relation with OID %u (for filenumber \"%s\")",
2351 reloid,
2352 relpathperm(change->data.tp.rlocator,
2353 MAIN_FORKNUM).str);
2354
2355 if (!RelationIsLogicallyLogged(relation))
2356 goto change_done;
2357
2358 /*
2359 * Ignore temporary heaps created during DDL unless the
2360 * plugin has asked for them.
2361 */
2362 if (relation->rd_rel->relrewrite && !rb->output_rewrites)
2363 goto change_done;
2364
2365 /*
2366 * For now ignore sequence changes entirely. Most of the
2367 * time they don't log changes using records we
2368 * understand, so it doesn't make sense to handle the few
2369 * cases we do.
2370 */
2371 if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
2372 goto change_done;
2373
2374 /* user-triggered change */
2375 if (!IsToastRelation(relation))
2376 {
2377 ReorderBufferToastReplace(rb, txn, relation, change);
2378 ReorderBufferApplyChange(rb, txn, relation, change,
2379 streaming);
2380
2381 /*
2382 * Only clear reassembled toast chunks if we're sure
2383 * they're not required anymore. The creator of the
2384 * tuple tells us.
2385 */
2386 if (change->data.tp.clear_toast_afterwards)
2388 }
2389 /* we're not interested in toast deletions */
2390 else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
2391 {
2392 /*
2393 * Need to reassemble the full toasted Datum in
2394 * memory, to ensure the chunks don't get reused till
2395 * we're done remove it from the list of this
2396 * transaction's changes. Otherwise it will get
2397 * freed/reused while restoring spooled data from
2398 * disk.
2399 */
2400 Assert(change->data.tp.newtuple != NULL);
2401
2402 dlist_delete(&change->node);
2403 ReorderBufferToastAppendChunk(rb, txn, relation,
2404 change);
2405 }
2406
2408
2409 /*
2410 * If speculative insertion was confirmed, the record
2411 * isn't needed anymore.
2412 */
2413 if (specinsert != NULL)
2414 {
2416 specinsert = NULL;
2417 }
2418
2419 if (RelationIsValid(relation))
2420 {
2421 RelationClose(relation);
2422 relation = NULL;
2423 }
2424 break;
2425
2427
2428 /*
2429 * Speculative insertions are dealt with by delaying the
2430 * processing of the insert until the confirmation record
2431 * arrives. For that we simply unlink the record from the
2432 * chain, so it does not get freed/reused while restoring
2433 * spooled data from disk.
2434 *
2435 * This is safe in the face of concurrent catalog changes
2436 * because the relevant relation can't be changed between
2437 * speculative insertion and confirmation due to
2438 * CheckTableNotInUse() and locking.
2439 */
2440
2441 /* Previous speculative insertion must be aborted */
2443
2444 /* and memorize the pending insertion */
2445 dlist_delete(&change->node);
2446 specinsert = change;
2447 break;
2448
2450
2451 /*
2452 * Abort for speculative insertion arrived. So cleanup the
2453 * specinsert tuple and toast hash.
2454 *
2455 * Note that we get the spec abort change for each toast
2456 * entry but we need to perform the cleanup only the first
2457 * time we get it for the main table.
2458 */
2459 if (specinsert != NULL)
2460 {
2461 /*
2462 * We must clean the toast hash before processing a
2463 * completely new tuple to avoid confusion about the
2464 * previous tuple's toast chunks.
2465 */
2468
2469 /* We don't need this record anymore. */
2471 specinsert = NULL;
2472 }
2473 break;
2474
2476 {
2477 int i;
2478 int nrelids = change->data.truncate.nrelids;
2479 int nrelations = 0;
2480 Relation *relations;
2481
2482 relations = palloc0_array(Relation, nrelids);
2483 for (i = 0; i < nrelids; i++)
2484 {
2485 Oid relid = change->data.truncate.relids[i];
2486 Relation rel;
2487
2488 rel = RelationIdGetRelation(relid);
2489
2490 if (!RelationIsValid(rel))
2491 elog(ERROR, "could not open relation with OID %u", relid);
2492
2493 if (!RelationIsLogicallyLogged(rel))
2494 continue;
2495
2496 relations[nrelations++] = rel;
2497 }
2498
2499 /* Apply the truncate. */
2501 relations, change,
2502 streaming);
2503
2504 for (i = 0; i < nrelations; i++)
2505 RelationClose(relations[i]);
2506
2507 break;
2508 }
2509
2511 ReorderBufferApplyMessage(rb, txn, change, streaming);
2512 break;
2513
2515 /* Execute the invalidation messages locally */
2517 change->data.inval.invalidations);
2518 break;
2519
2521 /* get rid of the old */
2523
2524 if (snapshot_now->copied)
2525 {
2526 ReorderBufferFreeSnap(rb, snapshot_now);
2527 snapshot_now =
2529 txn, command_id);
2530 }
2531
2532 /*
2533 * Restored from disk, need to be careful not to double
2534 * free. We could introduce refcounting for that, but for
2535 * now this seems infrequent enough not to care.
2536 */
2537 else if (change->data.snapshot->copied)
2538 {
2539 snapshot_now =
2541 txn, command_id);
2542 }
2543 else
2544 {
2545 snapshot_now = change->data.snapshot;
2546 }
2547
2548 /* and continue with the new one */
2549 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2550 break;
2551
2554
2555 if (command_id < change->data.command_id)
2556 {
2557 command_id = change->data.command_id;
2558
2559 if (!snapshot_now->copied)
2560 {
2561 /* we don't use the global one anymore */
2562 snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2563 txn, command_id);
2564 }
2565
2566 snapshot_now->curcid = command_id;
2567
2569 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2570 }
2571
2572 break;
2573
2575 elog(ERROR, "tuplecid value in changequeue");
2576 break;
2577 }
2578
2579 /*
2580 * It is possible that the data is not sent to downstream for a
2581 * long time either because the output plugin filtered it or there
2582 * is a DDL that generates a lot of data that is not processed by
2583 * the plugin. So, in such cases, the downstream can timeout. To
2584 * avoid that we try to send a keepalive message if required.
2585 * Trying to send a keepalive message after every change has some
2586 * overhead, but testing showed there is no noticeable overhead if
2587 * we do it after every ~100 changes.
2588 */
2589#define CHANGES_THRESHOLD 100
2590
2592 {
2593 rb->update_progress_txn(rb, txn, prev_lsn);
2594 changes_count = 0;
2595 }
2596 }
2597
2598 /* speculative insertion record must be freed by now */
2600
2601 /* clean up the iterator */
2603 iterstate = NULL;
2604
2605 /*
2606 * Update total transaction count and total bytes processed by the
2607 * transaction and its subtransactions. Ensure to not count the
2608 * streamed transaction multiple times.
2609 *
2610 * Note that the statistics computation has to be done after
2611 * ReorderBufferIterTXNFinish as it releases the serialized change
2612 * which we have already accounted in ReorderBufferIterTXNNext.
2613 */
2614 if (!rbtxn_is_streamed(txn))
2615 rb->totalTxns++;
2616
2617 rb->totalBytes += txn->total_size;
2618
2619 /*
2620 * Done with current changes, send the last message for this set of
2621 * changes depending upon streaming mode.
2622 */
2623 if (streaming)
2624 {
2625 if (stream_started)
2626 {
2627 rb->stream_stop(rb, txn, prev_lsn);
2628 stream_started = false;
2629 }
2630 }
2631 else
2632 {
2633 /*
2634 * Call either PREPARE (for two-phase transactions) or COMMIT (for
2635 * regular ones).
2636 */
2637 if (rbtxn_is_prepared(txn))
2638 {
2640 rb->prepare(rb, txn, commit_lsn);
2642 }
2643 else
2644 rb->commit(rb, txn, commit_lsn);
2645 }
2646
2647 /* this is just a sanity check against bad output plugin behaviour */
2649 elog(ERROR, "output plugin used XID %u",
2651
2652 /*
2653 * Remember the command ID and snapshot for the next set of changes in
2654 * streaming mode.
2655 */
2656 if (streaming)
2657 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2658 else if (snapshot_now->copied)
2659 ReorderBufferFreeSnap(rb, snapshot_now);
2660
2661 /* cleanup */
2663
2664 /*
2665 * Aborting the current (sub-)transaction as a whole has the right
2666 * semantics. We want all locks acquired in here to be released, not
2667 * reassigned to the parent and we do not want any database access
2668 * have persistent effects.
2669 */
2671
2672 /* make sure there's no cache pollution */
2674 {
2677 }
2678 else
2679 {
2683 }
2684
2685 if (using_subtxn)
2686 {
2689 CurrentResourceOwner = cowner;
2690 }
2691
2692 /*
2693 * We are here due to one of the four reasons: 1. Decoding an
2694 * in-progress txn. 2. Decoding a prepared txn. 3. Decoding of a
2695 * prepared txn that was (partially) streamed. 4. Decoding a committed
2696 * txn.
2697 *
2698 * For 1, we allow truncation of txn data by removing the changes
2699 * already streamed but still keeping other things like invalidations,
2700 * snapshot, and tuplecids. For 2 and 3, we indicate
2701 * ReorderBufferTruncateTXN to do more elaborate truncation of txn
2702 * data as the entire transaction has been decoded except for commit.
2703 * For 4, as the entire txn has been decoded, we can fully clean up
2704 * the TXN reorder buffer.
2705 */
2706 if (streaming || rbtxn_is_prepared(txn))
2707 {
2708 if (streaming)
2710
2712 /* Reset the CheckXidAlive */
2714 }
2715 else
2717 }
2718 PG_CATCH();
2719 {
2722
2723 /* TODO: Encapsulate cleanup from the PG_TRY and PG_CATCH blocks */
2724 if (iterstate)
2726
2728
2729 /*
2730 * Force cache invalidation to happen outside of a valid transaction
2731 * to prevent catalog access as we just caught an error.
2732 */
2734
2735 /* make sure there's no cache pollution */
2737 {
2740 }
2741 else
2742 {
2746 }
2747
2748 if (using_subtxn)
2749 {
2752 CurrentResourceOwner = cowner;
2753 }
2754
2755 /* Free the specinsert change before freeing the ReorderBufferTXN */
2756 if (specinsert != NULL)
2757 {
2759 specinsert = NULL;
2760 }
2761
2762 /*
2763 * The error code ERRCODE_TRANSACTION_ROLLBACK indicates a concurrent
2764 * abort of the (sub)transaction we are streaming or preparing. We
2765 * need to do the cleanup and return gracefully on this error, see
2766 * SetupCheckXidLive.
2767 *
2768 * This error code can be thrown by one of the callbacks we call
2769 * during decoding so we need to ensure that we return gracefully only
2770 * when we are sending the data in streaming mode and the streaming is
2771 * not finished yet or when we are sending the data out on a PREPARE
2772 * during a two-phase commit.
2773 */
2774 if (errdata->sqlerrcode == ERRCODE_TRANSACTION_ROLLBACK &&
2776 {
2777 /* curtxn must be set for streaming or prepared transactions */
2778 Assert(curtxn);
2779
2780 /* Cleanup the temporary error state. */
2783 errdata = NULL;
2784
2785 /* Remember the transaction is aborted. */
2787 curtxn->txn_flags |= RBTXN_IS_ABORTED;
2788
2789 /* Mark the transaction is streamed if appropriate */
2790 if (stream_started)
2792
2793 /* Reset the TXN so that it is allowed to stream remaining data. */
2794 ReorderBufferResetTXN(rb, txn, snapshot_now,
2795 command_id, prev_lsn);
2796 }
2797 else
2798 {
2801 PG_RE_THROW();
2802 }
2803 }
2804 PG_END_TRY();
2805}
2806
2807/*
2808 * Perform the replay of a transaction and its non-aborted subtransactions.
2809 *
2810 * Subtransactions previously have to be processed by
2811 * ReorderBufferCommitChild(), even if previously assigned to the toplevel
2812 * transaction with ReorderBufferAssignChild.
2813 *
2814 * This interface is called once a prepare or toplevel commit is read for both
2815 * streamed as well as non-streamed transactions.
2816 */
2817static void
2820 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
2821 TimestampTz commit_time,
2822 ReplOriginId origin_id, XLogRecPtr origin_lsn)
2823{
2824 Snapshot snapshot_now;
2825 CommandId command_id = FirstCommandId;
2826
2827 txn->final_lsn = commit_lsn;
2828 txn->end_lsn = end_lsn;
2829 txn->commit_time = commit_time;
2830 txn->origin_id = origin_id;
2831 txn->origin_lsn = origin_lsn;
2832
2833 /*
2834 * If the transaction was (partially) streamed, we need to commit it in a
2835 * 'streamed' way. That is, we first stream the remaining part of the
2836 * transaction, and then invoke stream_commit message.
2837 *
2838 * Called after everything (origin ID, LSN, ...) is stored in the
2839 * transaction to avoid passing that information directly.
2840 */
2841 if (rbtxn_is_streamed(txn))
2842 {
2844 return;
2845 }
2846
2847 /*
2848 * If this transaction has no snapshot, it didn't make any changes to the
2849 * database, so there's nothing to decode. Note that
2850 * ReorderBufferCommitChild will have transferred any snapshots from
2851 * subtransactions if there were any.
2852 */
2853 if (txn->base_snapshot == NULL)
2854 {
2855 Assert(txn->ninvalidations == 0);
2856
2857 /*
2858 * Removing this txn before a commit might result in the computation
2859 * of an incorrect restart_lsn. See SnapBuildProcessRunningXacts.
2860 */
2861 if (!rbtxn_is_prepared(txn))
2863 return;
2864 }
2865
2866 snapshot_now = txn->base_snapshot;
2867
2868 /* Process and send the changes to output plugin. */
2869 ReorderBufferProcessTXN(rb, txn, commit_lsn, snapshot_now,
2870 command_id, false);
2871}
2872
2873/*
2874 * Commit a transaction.
2875 *
2876 * See comments for ReorderBufferReplay().
2877 */
2878void
2880 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
2881 TimestampTz commit_time,
2882 ReplOriginId origin_id, XLogRecPtr origin_lsn)
2883{
2884 ReorderBufferTXN *txn;
2885
2886 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2887 false);
2888
2889 /* unknown transaction, nothing to replay */
2890 if (txn == NULL)
2891 return;
2892
2893 ReorderBufferReplay(txn, rb, xid, commit_lsn, end_lsn, commit_time,
2894 origin_id, origin_lsn);
2895}
2896
2897/*
2898 * Record the prepare information for a transaction. Also, mark the transaction
2899 * as a prepared transaction.
2900 */
2901bool
2903 XLogRecPtr prepare_lsn, XLogRecPtr end_lsn,
2904 TimestampTz prepare_time,
2905 ReplOriginId origin_id, XLogRecPtr origin_lsn)
2906{
2907 ReorderBufferTXN *txn;
2908
2909 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2910
2911 /* unknown transaction, nothing to do */
2912 if (txn == NULL)
2913 return false;
2914
2915 /*
2916 * Remember the prepare information to be later used by commit prepared in
2917 * case we skip doing prepare.
2918 */
2919 txn->final_lsn = prepare_lsn;
2920 txn->end_lsn = end_lsn;
2921 txn->prepare_time = prepare_time;
2922 txn->origin_id = origin_id;
2923 txn->origin_lsn = origin_lsn;
2924
2925 /* Mark this transaction as a prepared transaction */
2928
2929 return true;
2930}
2931
2932/* Remember that we have skipped prepare */
2933void
2935{
2936 ReorderBufferTXN *txn;
2937
2938 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2939
2940 /* unknown transaction, nothing to do */
2941 if (txn == NULL)
2942 return;
2943
2944 /* txn must have been marked as a prepared transaction */
2947}
2948
2949/*
2950 * Prepare a two-phase transaction.
2951 *
2952 * See comments for ReorderBufferReplay().
2953 */
2954void
2956 char *gid)
2957{
2958 ReorderBufferTXN *txn;
2959
2960 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2961 false);
2962
2963 /* unknown transaction, nothing to replay */
2964 if (txn == NULL)
2965 return;
2966
2967 /*
2968 * txn must have been marked as a prepared transaction and must have
2969 * neither been skipped nor sent a prepare. Also, the prepare info must
2970 * have been updated in it by now.
2971 */
2974
2975 txn->gid = pstrdup(gid);
2976
2977 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
2978 txn->prepare_time, txn->origin_id, txn->origin_lsn);
2979
2980 /*
2981 * Send a prepare if not already done so. This might occur if we have
2982 * detected a concurrent abort while replaying the non-streaming
2983 * transaction.
2984 */
2985 if (!rbtxn_sent_prepare(txn))
2986 {
2987 rb->prepare(rb, txn, txn->final_lsn);
2989 }
2990}
2991
2992/*
2993 * This is used to handle COMMIT/ROLLBACK PREPARED.
2994 */
2995void
2997 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
2998 XLogRecPtr two_phase_at,
2999 TimestampTz commit_time, ReplOriginId origin_id,
3000 XLogRecPtr origin_lsn, char *gid, bool is_commit)
3001{
3002 ReorderBufferTXN *txn;
3003 XLogRecPtr prepare_end_lsn;
3004 TimestampTz prepare_time;
3005
3006 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, commit_lsn, false);
3007
3008 /* unknown transaction, nothing to do */
3009 if (txn == NULL)
3010 return;
3011
3012 /*
3013 * By this time the txn has the prepare record information, remember it to
3014 * be later used for rollback.
3015 */
3016 prepare_end_lsn = txn->end_lsn;
3017 prepare_time = txn->prepare_time;
3018
3019 /* add the gid in the txn */
3020 txn->gid = pstrdup(gid);
3021
3022 /*
3023 * It is possible that this transaction is not decoded at prepare time
3024 * either because by that time we didn't have a consistent snapshot, or
3025 * two_phase was not enabled, or it was decoded earlier but we have
3026 * restarted. We only need to send the prepare if it was not decoded
3027 * earlier. We don't need to decode the xact for aborts if it is not done
3028 * already.
3029 */
3030 if ((txn->final_lsn < two_phase_at) && is_commit)
3031 {
3032 /*
3033 * txn must have been marked as a prepared transaction and skipped but
3034 * not sent a prepare. Also, the prepare info must have been updated
3035 * in txn even if we skip prepare.
3036 */
3040
3041 /*
3042 * By this time the txn has the prepare record information and it is
3043 * important to use that so that downstream gets the accurate
3044 * information. If instead, we have passed commit information here
3045 * then downstream can behave as it has already replayed commit
3046 * prepared after the restart.
3047 */
3048 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
3049 txn->prepare_time, txn->origin_id, txn->origin_lsn);
3050 }
3051
3052 txn->final_lsn = commit_lsn;
3053 txn->end_lsn = end_lsn;
3054 txn->commit_time = commit_time;
3055 txn->origin_id = origin_id;
3056 txn->origin_lsn = origin_lsn;
3057
3058 if (is_commit)
3059 rb->commit_prepared(rb, txn, commit_lsn);
3060 else
3061 rb->rollback_prepared(rb, txn, prepare_end_lsn, prepare_time);
3062
3063 /* cleanup: make sure there's no cache pollution */
3065 txn->invalidations);
3067}
3068
3069/*
3070 * Abort a transaction that possibly has previous changes. Needs to be first
3071 * called for subtransactions and then for the toplevel xid.
3072 *
3073 * NB: Transactions handled here have to have actively aborted (i.e. have
3074 * produced an abort record). Implicitly aborted transactions are handled via
3075 * ReorderBufferAbortOld(); transactions we're just not interested in, but
3076 * which have committed are handled in ReorderBufferForget().
3077 *
3078 * This function purges this transaction and its contents from memory and
3079 * disk.
3080 */
3081void
3083 TimestampTz abort_time)
3084{
3085 ReorderBufferTXN *txn;
3086
3087 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3088 false);
3089
3090 /* unknown, nothing to remove */
3091 if (txn == NULL)
3092 return;
3093
3094 txn->abort_time = abort_time;
3095
3096 /* For streamed transactions notify the remote node about the abort. */
3097 if (rbtxn_is_streamed(txn))
3098 {
3099 rb->stream_abort(rb, txn, lsn);
3100
3101 /*
3102 * We might have decoded changes for this transaction that could load
3103 * the cache as per the current transaction's view (consider DDL's
3104 * happened in this transaction). We don't want the decoding of future
3105 * transactions to use those cache entries so execute only the inval
3106 * messages in this transaction.
3107 */
3108 if (txn->ninvalidations > 0)
3110 txn->invalidations);
3111 }
3112
3113 /* cosmetic... */
3114 txn->final_lsn = lsn;
3115
3116 /* remove potential on-disk data, and deallocate */
3118}
3119
3120/*
3121 * Abort all transactions that aren't actually running anymore because the
3122 * server restarted.
3123 *
3124 * NB: These really have to be transactions that have aborted due to a server
3125 * crash/immediate restart, as we don't deal with invalidations here.
3126 */
3127void
3129{
3131
3132 /*
3133 * Iterate through all (potential) toplevel TXNs and abort all that are
3134 * older than what possibly can be running. Once we've found the first
3135 * that is alive we stop, there might be some that acquired an xid earlier
3136 * but started writing later, but it's unlikely and they will be cleaned
3137 * up in a later call to this function.
3138 */
3139 dlist_foreach_modify(it, &rb->toplevel_by_lsn)
3140 {
3141 ReorderBufferTXN *txn;
3142
3143 txn = dlist_container(ReorderBufferTXN, node, it.cur);
3144
3145 if (TransactionIdPrecedes(txn->xid, oldestRunningXid))
3146 {
3147 elog(DEBUG2, "aborting old transaction %u", txn->xid);
3148
3149 /* Notify the remote node about the crash/immediate restart. */
3150 if (rbtxn_is_streamed(txn))
3151 rb->stream_abort(rb, txn, InvalidXLogRecPtr);
3152
3153 /* remove potential on-disk data, and deallocate this tx */
3155 }
3156 else
3157 return;
3158 }
3159}
3160
3161/*
3162 * Forget the contents of a transaction if we aren't interested in its
3163 * contents. Needs to be first called for subtransactions and then for the
3164 * toplevel xid.
3165 *
3166 * This is significantly different to ReorderBufferAbort() because
3167 * transactions that have committed need to be treated differently from aborted
3168 * ones since they may have modified the catalog.
3169 *
3170 * Note that this is only allowed to be called in the moment a transaction
3171 * commit has just been read, not earlier; otherwise later records referring
3172 * to this xid might re-create the transaction incompletely.
3173 */
3174void
3176{
3177 ReorderBufferTXN *txn;
3178
3179 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3180 false);
3181
3182 /* unknown, nothing to forget */
3183 if (txn == NULL)
3184 return;
3185
3186 /* this transaction mustn't be streamed */
3188
3189 /* cosmetic... */
3190 txn->final_lsn = lsn;
3191
3192 /*
3193 * Process only cache invalidation messages in this transaction if there
3194 * are any. Even if we're not interested in the transaction's contents, it
3195 * could have manipulated the catalog and we need to update the caches
3196 * according to that.
3197 */
3198 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3200 txn->invalidations);
3201 else
3202 Assert(txn->ninvalidations == 0);
3203
3204 /* remove potential on-disk data, and deallocate */
3206}
3207
3208/*
3209 * Invalidate cache for those transactions that need to be skipped just in case
3210 * catalogs were manipulated as part of the transaction.
3211 *
3212 * Note that this is a special-purpose function for prepared transactions where
3213 * we don't want to clean up the TXN even when we decide to skip it. See
3214 * DecodePrepare.
3215 */
3216void
3218{
3219 ReorderBufferTXN *txn;
3220
3221 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3222 false);
3223
3224 /* unknown, nothing to do */
3225 if (txn == NULL)
3226 return;
3227
3228 /*
3229 * Process cache invalidation messages if there are any. Even if we're not
3230 * interested in the transaction's contents, it could have manipulated the
3231 * catalog and we need to update the caches according to that.
3232 */
3233 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3235 txn->invalidations);
3236 else
3237 Assert(txn->ninvalidations == 0);
3238}
3239
3240
3241/*
3242 * Execute invalidations happening outside the context of a decoded
3243 * transaction. That currently happens either for xid-less commits
3244 * (cf. RecordTransactionCommit()) or for invalidations in uninteresting
3245 * transactions (via ReorderBufferForget()).
3246 */
3247void
3249 SharedInvalidationMessage *invalidations)
3250{
3254 int i;
3255
3256 if (use_subtxn)
3258
3259 /*
3260 * Force invalidations to happen outside of a valid transaction - that way
3261 * entries will just be marked as invalid without accessing the catalog.
3262 * That's advantageous because we don't need to setup the full state
3263 * necessary for catalog access.
3264 */
3265 if (use_subtxn)
3267
3268 for (i = 0; i < ninvalidations; i++)
3269 LocalExecuteInvalidationMessage(&invalidations[i]);
3270
3271 if (use_subtxn)
3272 {
3275 CurrentResourceOwner = cowner;
3276 }
3277}
3278
3279/*
3280 * Tell reorderbuffer about an xid seen in the WAL stream. Has to be called at
3281 * least once for every xid in XLogRecord->xl_xid (other places in records
3282 * may, but do not have to be passed through here).
3283 *
3284 * Reorderbuffer keeps some data structures about transactions in LSN order,
3285 * for efficiency. To do that it has to know about when transactions are seen
3286 * first in the WAL. As many types of records are not actually interesting for
3287 * logical decoding, they do not necessarily pass through here.
3288 */
3289void
3291{
3292 /* many records won't have an xid assigned, centralize check here */
3293 if (xid != InvalidTransactionId)
3294 ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3295}
3296
3297/*
3298 * Add a new snapshot to this transaction that may only used after lsn 'lsn'
3299 * because the previous snapshot doesn't describe the catalog correctly for
3300 * following rows.
3301 */
3302void
3305{
3307
3308 change->data.snapshot = snap;
3310
3311 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3312}
3313
3314/*
3315 * Set up the transaction's base snapshot.
3316 *
3317 * If we know that xid is a subtransaction, set the base snapshot on the
3318 * top-level transaction instead.
3319 */
3320void
3323{
3324 ReorderBufferTXN *txn;
3325 bool is_new;
3326
3327 Assert(snap != NULL);
3328
3329 /*
3330 * Fetch the transaction to operate on. If we know it's a subtransaction,
3331 * operate on its top-level transaction instead.
3332 */
3333 txn = ReorderBufferTXNByXid(rb, xid, true, &is_new, lsn, true);
3334 if (rbtxn_is_known_subxact(txn))
3335 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3336 NULL, InvalidXLogRecPtr, false);
3337 Assert(txn->base_snapshot == NULL);
3338
3339 txn->base_snapshot = snap;
3340 txn->base_snapshot_lsn = lsn;
3341 dlist_push_tail(&rb->txns_by_base_snapshot_lsn, &txn->base_snapshot_node);
3342
3344}
3345
3346/*
3347 * Access the catalog with this CommandId at this point in the changestream.
3348 *
3349 * May only be called for command ids > 1
3350 */
3351void
3354{
3356
3357 change->data.command_id = cid;
3359
3360 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3361}
3362
3363/*
3364 * Update memory counters to account for the new or removed change.
3365 *
3366 * We update two counters - in the reorder buffer, and in the transaction
3367 * containing the change. The reorder buffer counter allows us to quickly
3368 * decide if we reached the memory limit, the transaction counter allows
3369 * us to quickly pick the largest transaction for eviction.
3370 *
3371 * Either txn or change must be non-NULL at least. We update the memory
3372 * counter of txn if it's non-NULL, otherwise change->txn.
3373 *
3374 * When streaming is enabled, we need to update the toplevel transaction
3375 * counters instead - we don't really care about subtransactions as we
3376 * can't stream them individually anyway, and we only pick toplevel
3377 * transactions for eviction. So only toplevel transactions matter.
3378 */
3379static void
3381 ReorderBufferChange *change,
3382 ReorderBufferTXN *txn,
3383 bool addition, Size sz)
3384{
3385 ReorderBufferTXN *toptxn;
3386
3387 Assert(txn || change);
3388
3389 /*
3390 * Ignore tuple CID changes, because those are not evicted when reaching
3391 * memory limit. So we just don't count them, because it might easily
3392 * trigger a pointless attempt to spill.
3393 */
3394 if (change && change->action == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID)
3395 return;
3396
3397 if (sz == 0)
3398 return;
3399
3400 if (txn == NULL)
3401 txn = change->txn;
3402 Assert(txn != NULL);
3403
3404 /*
3405 * Update the total size in top level as well. This is later used to
3406 * compute the decoding stats.
3407 */
3408 toptxn = rbtxn_get_toptxn(txn);
3409
3410 if (addition)
3411 {
3412 Size oldsize = txn->size;
3413
3414 txn->size += sz;
3415 rb->size += sz;
3416
3417 /* Update the total size in the top transaction. */
3418 toptxn->total_size += sz;
3419
3420 /* Update the max-heap */
3421 if (oldsize != 0)
3422 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3423 pairingheap_add(rb->txn_heap, &txn->txn_node);
3424 }
3425 else
3426 {
3427 Assert((rb->size >= sz) && (txn->size >= sz));
3428 txn->size -= sz;
3429 rb->size -= sz;
3430
3431 /* Update the total size in the top transaction. */
3432 toptxn->total_size -= sz;
3433
3434 /* Update the max-heap */
3435 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3436 if (txn->size != 0)
3437 pairingheap_add(rb->txn_heap, &txn->txn_node);
3438 }
3439
3440 Assert(txn->size <= rb->size);
3441}
3442
3443/*
3444 * Add new (relfilelocator, tid) -> (cmin, cmax) mappings.
3445 *
3446 * We do not include this change type in memory accounting, because we
3447 * keep CIDs in a separate list and do not evict them when reaching
3448 * the memory limit.
3449 */
3450void
3452 XLogRecPtr lsn, RelFileLocator locator,
3453 ItemPointerData tid, CommandId cmin,
3454 CommandId cmax, CommandId combocid)
3455{
3457 ReorderBufferTXN *txn;
3458
3459 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3460
3461 change->data.tuplecid.locator = locator;
3462 change->data.tuplecid.tid = tid;
3463 change->data.tuplecid.cmin = cmin;
3464 change->data.tuplecid.cmax = cmax;
3465 change->data.tuplecid.combocid = combocid;
3466 change->lsn = lsn;
3467 change->txn = txn;
3469
3470 dlist_push_tail(&txn->tuplecids, &change->node);
3471 txn->ntuplecids++;
3472}
3473
3474/*
3475 * Add new invalidation messages to the reorder buffer queue.
3476 */
3477static void
3479 XLogRecPtr lsn, Size nmsgs,
3481{
3482 ReorderBufferChange *change;
3483
3484 change = ReorderBufferAllocChange(rb);
3486 change->data.inval.ninvalidations = nmsgs;
3488 memcpy(change->data.inval.invalidations, msgs,
3489 sizeof(SharedInvalidationMessage) * nmsgs);
3490
3491 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3492}
3493
3494/*
3495 * A helper function for ReorderBufferAddInvalidations() and
3496 * ReorderBufferAddDistributedInvalidations() to accumulate the invalidation
3497 * messages to the **invals_out.
3498 */
3499static void
3504{
3505 if (*ninvals_out == 0)
3506 {
3510 }
3511 else
3512 {
3513 /* Enlarge the array of inval messages */
3514 *invals_out =
3516 (*ninvals_out + nmsgs_new));
3520 }
3521}
3522
3523/*
3524 * Accumulate the invalidations for executing them later.
3525 *
3526 * This needs to be called for each XLOG_XACT_INVALIDATIONS message and
3527 * accumulates all the invalidation messages in the toplevel transaction, if
3528 * available, otherwise in the current transaction, as well as in the form of
3529 * change in reorder buffer. We require to record it in form of the change
3530 * so that we can execute only the required invalidations instead of executing
3531 * all the invalidations on each CommandId increment. We also need to
3532 * accumulate these in the txn buffer because in some cases where we skip
3533 * processing the transaction (see ReorderBufferForget), we need to execute
3534 * all the invalidations together.
3535 */
3536void
3538 XLogRecPtr lsn, Size nmsgs,
3540{
3541 ReorderBufferTXN *txn;
3542 MemoryContext oldcontext;
3543
3544 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3545
3546 oldcontext = MemoryContextSwitchTo(rb->context);
3547
3548 /*
3549 * Collect all the invalidations under the top transaction, if available,
3550 * so that we can execute them all together. See comments atop this
3551 * function.
3552 */
3553 txn = rbtxn_get_toptxn(txn);
3554
3555 Assert(nmsgs > 0);
3556
3558 &txn->ninvalidations,
3559 msgs, nmsgs);
3560
3561 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3562
3563 MemoryContextSwitchTo(oldcontext);
3564}
3565
3566/*
3567 * Accumulate the invalidations distributed by other committed transactions
3568 * for executing them later.
3569 *
3570 * This function is similar to ReorderBufferAddInvalidations() but stores
3571 * the given inval messages to the txn->invalidations_distributed with the
3572 * overflow check.
3573 *
3574 * This needs to be called by committed transactions to distribute their
3575 * inval messages to in-progress transactions.
3576 */
3577void
3579 XLogRecPtr lsn, Size nmsgs,
3581{
3582 ReorderBufferTXN *txn;
3583 MemoryContext oldcontext;
3584
3585 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3586
3587 oldcontext = MemoryContextSwitchTo(rb->context);
3588
3589 /*
3590 * Collect all the invalidations under the top transaction, if available,
3591 * so that we can execute them all together. See comments
3592 * ReorderBufferAddInvalidations.
3593 */
3594 txn = rbtxn_get_toptxn(txn);
3595
3596 Assert(nmsgs > 0);
3597
3599 {
3600 /*
3601 * Check the transaction has enough space for storing distributed
3602 * invalidation messages.
3603 */
3605 {
3606 /*
3607 * Mark the invalidation message as overflowed and free up the
3608 * messages accumulated so far.
3609 */
3611
3613 {
3617 }
3618 }
3619 else
3622 msgs, nmsgs);
3623 }
3624
3625 /* Queue the invalidation messages into the transaction */
3626 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3627
3628 MemoryContextSwitchTo(oldcontext);
3629}
3630
3631/*
3632 * Apply all invalidations we know. Possibly we only need parts at this point
3633 * in the changestream but we don't know which those are.
3634 */
3635static void
3637{
3638 int i;
3639
3640 for (i = 0; i < nmsgs; i++)
3642}
3643
3644/*
3645 * Mark a transaction as containing catalog changes
3646 */
3647void
3649 XLogRecPtr lsn)
3650{
3651 ReorderBufferTXN *txn;
3652
3653 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3654
3655 if (!rbtxn_has_catalog_changes(txn))
3656 {
3658 dclist_push_tail(&rb->catchange_txns, &txn->catchange_node);
3659 }
3660
3661 /*
3662 * Mark top-level transaction as having catalog changes too if one of its
3663 * children has so that the ReorderBufferBuildTupleCidHash can
3664 * conveniently check just top-level transaction and decide whether to
3665 * build the hash table or not.
3666 */
3667 if (rbtxn_is_subtxn(txn))
3668 {
3669 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
3670
3671 if (!rbtxn_has_catalog_changes(toptxn))
3672 {
3674 dclist_push_tail(&rb->catchange_txns, &toptxn->catchange_node);
3675 }
3676 }
3677}
3678
3679/*
3680 * Return palloc'ed array of the transactions that have changed catalogs.
3681 * The returned array is sorted in xidComparator order.
3682 *
3683 * The caller must free the returned array when done with it.
3684 */
3687{
3688 dlist_iter iter;
3689 TransactionId *xids = NULL;
3690 size_t xcnt = 0;
3691
3692 /* Quick return if the list is empty */
3693 if (dclist_count(&rb->catchange_txns) == 0)
3694 return NULL;
3695
3696 /* Initialize XID array */
3697 xids = palloc_array(TransactionId, dclist_count(&rb->catchange_txns));
3698 dclist_foreach(iter, &rb->catchange_txns)
3699 {
3701 catchange_node,
3702 iter.cur);
3703
3705
3706 xids[xcnt++] = txn->xid;
3707 }
3708
3709 qsort(xids, xcnt, sizeof(TransactionId), xidComparator);
3710
3711 Assert(xcnt == dclist_count(&rb->catchange_txns));
3712 return xids;
3713}
3714
3715/*
3716 * Query whether a transaction is already *known* to contain catalog
3717 * changes. This can be wrong until directly before the commit!
3718 */
3719bool
3721{
3722 ReorderBufferTXN *txn;
3723
3724 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3725 false);
3726 if (txn == NULL)
3727 return false;
3728
3729 return rbtxn_has_catalog_changes(txn);
3730}
3731
3732/*
3733 * ReorderBufferXidHasBaseSnapshot
3734 * Have we already set the base snapshot for the given txn/subtxn?
3735 */
3736bool
3738{
3739 ReorderBufferTXN *txn;
3740
3741 txn = ReorderBufferTXNByXid(rb, xid, false,
3742 NULL, InvalidXLogRecPtr, false);
3743
3744 /* transaction isn't known yet, ergo no snapshot */
3745 if (txn == NULL)
3746 return false;
3747
3748 /* a known subtxn? operate on top-level txn instead */
3749 if (rbtxn_is_known_subxact(txn))
3750 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3751 NULL, InvalidXLogRecPtr, false);
3752
3753 return txn->base_snapshot != NULL;
3754}
3755
3756
3757/*
3758 * ---------------------------------------
3759 * Disk serialization support
3760 * ---------------------------------------
3761 */
3762
3763/*
3764 * Ensure the IO buffer is >= sz.
3765 */
3766static void
3768{
3769 if (!rb->outbufsize)
3770 {
3771 rb->outbuf = MemoryContextAlloc(rb->context, sz);
3772 rb->outbufsize = sz;
3773 }
3774 else if (rb->outbufsize < sz)
3775 {
3776 rb->outbuf = repalloc(rb->outbuf, sz);
3777 rb->outbufsize = sz;
3778 }
3779}
3780
3781
3782/* Compare two transactions by size */
3783static int
3785{
3788
3789 if (ta->size < tb->size)
3790 return -1;
3791 if (ta->size > tb->size)
3792 return 1;
3793 return 0;
3794}
3795
3796/*
3797 * Find the largest transaction (toplevel or subxact) to evict (spill to disk).
3798 */
3799static ReorderBufferTXN *
3801{
3803
3804 /* Get the largest transaction from the max-heap */
3806 pairingheap_first(rb->txn_heap));
3807
3808 Assert(largest);
3809 Assert(largest->size > 0);
3810 Assert(largest->size <= rb->size);
3811
3812 return largest;
3813}
3814
3815/*
3816 * Find the largest streamable (and non-aborted) toplevel transaction to evict
3817 * (by streaming).
3818 *
3819 * This can be seen as an optimized version of ReorderBufferLargestTXN, which
3820 * should give us the same transaction (because we don't update memory account
3821 * for subtransaction with streaming, so it's always 0). But we can simply
3822 * iterate over the limited number of toplevel transactions that have a base
3823 * snapshot. There is no use of selecting a transaction that doesn't have base
3824 * snapshot because we don't decode such transactions. Also, we do not select
3825 * the transaction which doesn't have any streamable change.
3826 *
3827 * Note that, we skip transactions that contain incomplete changes. There
3828 * is a scope of optimization here such that we can select the largest
3829 * transaction which has incomplete changes. But that will make the code and
3830 * design quite complex and that might not be worth the benefit. If we plan to
3831 * stream the transactions that contain incomplete changes then we need to
3832 * find a way to partially stream/truncate the transaction changes in-memory
3833 * and build a mechanism to partially truncate the spilled files.
3834 * Additionally, whenever we partially stream the transaction we need to
3835 * maintain the last streamed lsn and next time we need to restore from that
3836 * segment and the offset in WAL. As we stream the changes from the top
3837 * transaction and restore them subtransaction wise, we need to even remember
3838 * the subxact from where we streamed the last change.
3839 */
3840static ReorderBufferTXN *
3842{
3843 dlist_iter iter;
3844 Size largest_size = 0;
3846
3847 /* Find the largest top-level transaction having a base snapshot. */
3848 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
3849 {
3850 ReorderBufferTXN *txn;
3851
3852 txn = dlist_container(ReorderBufferTXN, base_snapshot_node, iter.cur);
3853
3854 /* must not be a subtxn */
3856 /* base_snapshot must be set */
3857 Assert(txn->base_snapshot != NULL);
3858
3859 /* Don't consider these kinds of transactions for eviction. */
3860 if (rbtxn_has_partial_change(txn) ||
3862 rbtxn_is_aborted(txn))
3863 continue;
3864
3865 /* Find the largest of the eviction candidates. */
3866 if ((largest == NULL || txn->total_size > largest_size) &&
3867 (txn->total_size > 0))
3868 {
3869 largest = txn;
3870 largest_size = txn->total_size;
3871 }
3872 }
3873
3874 return largest;
3875}
3876
3877/*
3878 * Check whether the logical_decoding_work_mem limit was reached, and if yes
3879 * pick the largest (sub)transaction at-a-time to evict and spill its changes to
3880 * disk or send to the output plugin until we reach under the memory limit.
3881 *
3882 * If debug_logical_replication_streaming is set to "immediate", stream or
3883 * serialize the changes immediately.
3884 *
3885 * XXX At this point we select the transactions until we reach under the memory
3886 * limit, but we might also adapt a more elaborate eviction strategy - for example
3887 * evicting enough transactions to free certain fraction (e.g. 50%) of the memory
3888 * limit.
3889 */
3890static void
3892{
3893 ReorderBufferTXN *txn;
3894 bool update_stats = true;
3895
3896 if (rb->size >= logical_decoding_work_mem * (Size) 1024)
3897 {
3898 /*
3899 * Update the statistics as the memory usage has reached the limit. We
3900 * report the statistics update later in this function since we can
3901 * update the slot statistics altogether while streaming or
3902 * serializing transactions in most cases.
3903 */
3904 rb->memExceededCount += 1;
3905 }
3907 {
3908 /*
3909 * Bail out if debug_logical_replication_streaming is buffered and we
3910 * haven't exceeded the memory limit.
3911 */
3912 return;
3913 }
3914
3915 /*
3916 * If debug_logical_replication_streaming is immediate, loop until there's
3917 * no change. Otherwise, loop until we reach under the memory limit. One
3918 * might think that just by evicting the largest (sub)transaction we will
3919 * come under the memory limit based on assumption that the selected
3920 * transaction is at least as large as the most recent change (which
3921 * caused us to go over the memory limit). However, that is not true
3922 * because a user can reduce the logical_decoding_work_mem to a smaller
3923 * value before the most recent change.
3924 */
3925 while (rb->size >= logical_decoding_work_mem * (Size) 1024 ||
3927 rb->size > 0))
3928 {
3929 /*
3930 * Pick the largest non-aborted transaction and evict it from memory
3931 * by streaming, if possible. Otherwise, spill to disk.
3932 */
3935 {
3936 /* we know there has to be one, because the size is not zero */
3937 Assert(txn && rbtxn_is_toptxn(txn));
3938 Assert(txn->total_size > 0);
3939 Assert(rb->size >= txn->total_size);
3940
3941 /* skip the transaction if aborted */
3943 continue;
3944
3946 }
3947 else
3948 {
3949 /*
3950 * Pick the largest transaction (or subtransaction) and evict it
3951 * from memory by serializing it to disk.
3952 */
3954
3955 /* we know there has to be one, because the size is not zero */
3956 Assert(txn);
3957 Assert(txn->size > 0);
3958 Assert(rb->size >= txn->size);
3959
3960 /* skip the transaction if aborted */
3962 continue;
3963
3965 }
3966
3967 /*
3968 * After eviction, the transaction should have no entries in memory,
3969 * and should use 0 bytes for changes.
3970 */
3971 Assert(txn->size == 0);
3972 Assert(txn->nentries_mem == 0);
3973
3974 /*
3975 * We've reported the memExceededCount update while streaming or
3976 * serializing the transaction.
3977 */
3978 update_stats = false;
3979 }
3980
3981 if (update_stats)
3983
3984 /* We must be under the memory limit now. */
3985 Assert(rb->size < logical_decoding_work_mem * (Size) 1024);
3986}
3987
3988/*
3989 * Spill data of a large transaction (and its subtransactions) to disk.
3990 */
3991static void
3993{
3996 int fd = -1;
3998 Size spilled = 0;
3999 Size size = txn->size;
4000
4001 elog(DEBUG2, "spill %u changes in XID %u to disk",
4002 (uint32) txn->nentries_mem, txn->xid);
4003
4004 /* do the same to all child TXs */
4006 {
4008
4011 }
4012
4013 /* serialize changestream */
4015 {
4016 ReorderBufferChange *change;
4017
4018 change = dlist_container(ReorderBufferChange, node, change_i.cur);
4019
4020 /*
4021 * store in segment in which it belongs by start lsn, don't split over
4022 * multiple segments tho
4023 */
4024 if (fd == -1 ||
4026 {
4027 char path[MAXPGPATH];
4028
4029 if (fd != -1)
4031
4033
4034 /*
4035 * No need to care about TLIs here, only used during a single run,
4036 * so each LSN only maps to a specific WAL record.
4037 */
4039 curOpenSegNo);
4040
4041 /* open segment, create it if necessary */
4042 fd = OpenTransientFile(path,
4044
4045 if (fd < 0)
4046 ereport(ERROR,
4048 errmsg("could not open file \"%s\": %m", path)));
4049 }
4050
4051 ReorderBufferSerializeChange(rb, txn, fd, change);
4052 dlist_delete(&change->node);
4053 ReorderBufferFreeChange(rb, change, false);
4054
4055 spilled++;
4056 }
4057
4058 /* Update the memory counter */
4059 ReorderBufferChangeMemoryUpdate(rb, NULL, txn, false, size);
4060
4061 /* update the statistics iff we have spilled anything */
4062 if (spilled)
4063 {
4064 rb->spillCount += 1;
4065 rb->spillBytes += size;
4066
4067 /* don't consider already serialized transactions */
4068 rb->spillTxns += (rbtxn_is_serialized(txn) || rbtxn_is_serialized_clear(txn)) ? 0 : 1;
4069
4070 /* update the decoding stats */
4072 }
4073
4074 Assert(spilled == txn->nentries_mem);
4076 txn->nentries_mem = 0;
4078
4079 if (fd != -1)
4081}
4082
4083/*
4084 * Serialize individual change to disk.
4085 */
4086static void
4088 int fd, ReorderBufferChange *change)
4089{
4092
4094
4095 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4096 memcpy(&ondisk->change, change, sizeof(ReorderBufferChange));
4097
4098 switch (change->action)
4099 {
4100 /* fall through these, they're all similar enough */
4105 {
4106 char *data;
4108 newtup;
4109 Size oldlen = 0;
4110 Size newlen = 0;
4111
4112 oldtup = change->data.tp.oldtuple;
4113 newtup = change->data.tp.newtuple;
4114
4115 if (oldtup)
4116 {
4117 sz += sizeof(HeapTupleData);
4118 oldlen = oldtup->t_len;
4119 sz += oldlen;
4120 }
4121
4122 if (newtup)
4123 {
4124 sz += sizeof(HeapTupleData);
4125 newlen = newtup->t_len;
4126 sz += newlen;
4127 }
4128
4129 /* make sure we have enough space */
4131
4132 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4133 /* might have been reallocated above */
4134 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4135
4136 if (oldlen)
4137 {
4138 memcpy(data, oldtup, sizeof(HeapTupleData));
4139 data += sizeof(HeapTupleData);
4140
4141 memcpy(data, oldtup->t_data, oldlen);
4142 data += oldlen;
4143 }
4144
4145 if (newlen)
4146 {
4147 memcpy(data, newtup, sizeof(HeapTupleData));
4148 data += sizeof(HeapTupleData);
4149
4150 memcpy(data, newtup->t_data, newlen);
4151 data += newlen;
4152 }
4153 break;
4154 }
4156 {
4157 char *data;
4158 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4159
4160 sz += prefix_size + change->data.msg.message_size +
4161 sizeof(Size) + sizeof(Size);
4163
4164 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4165
4166 /* might have been reallocated above */
4167 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4168
4169 /* write the prefix including the size */
4170 memcpy(data, &prefix_size, sizeof(Size));
4171 data += sizeof(Size);
4172 memcpy(data, change->data.msg.prefix,
4173 prefix_size);
4174 data += prefix_size;
4175
4176 /* write the message including the size */
4177 memcpy(data, &change->data.msg.message_size, sizeof(Size));
4178 data += sizeof(Size);
4179 memcpy(data, change->data.msg.message,
4180 change->data.msg.message_size);
4181 data += change->data.msg.message_size;
4182
4183 break;
4184 }
4186 {
4187 char *data;
4189 change->data.inval.ninvalidations;
4190
4191 sz += inval_size;
4192
4194 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4195
4196 /* might have been reallocated above */
4197 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4199 data += inval_size;
4200
4201 break;
4202 }
4204 {
4205 Snapshot snap;
4206 char *data;
4207
4208 snap = change->data.snapshot;
4209
4210 sz += sizeof(SnapshotData) +
4211 sizeof(TransactionId) * snap->xcnt +
4212 sizeof(TransactionId) * snap->subxcnt;
4213
4214 /* make sure we have enough space */
4216 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4217 /* might have been reallocated above */
4218 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4219
4220 memcpy(data, snap, sizeof(SnapshotData));
4221 data += sizeof(SnapshotData);
4222
4223 if (snap->xcnt)
4224 {
4225 memcpy(data, snap->xip,
4226 sizeof(TransactionId) * snap->xcnt);
4227 data += sizeof(TransactionId) * snap->xcnt;
4228 }
4229
4230 if (snap->subxcnt)
4231 {
4232 memcpy(data, snap->subxip,
4233 sizeof(TransactionId) * snap->subxcnt);
4234 data += sizeof(TransactionId) * snap->subxcnt;
4235 }
4236 break;
4237 }
4239 {
4240 Size size;
4241 char *data;
4242
4243 /* account for the OIDs of truncated relations */
4244 size = sizeof(Oid) * change->data.truncate.nrelids;
4245 sz += size;
4246
4247 /* make sure we have enough space */
4249
4250 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4251 /* might have been reallocated above */
4252 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4253
4254 memcpy(data, change->data.truncate.relids, size);
4255 data += size;
4256
4257 break;
4258 }
4263 /* ReorderBufferChange contains everything important */
4264 break;
4265 }
4266
4267 ondisk->size = sz;
4268
4269 errno = 0;
4271 if (write(fd, rb->outbuf, ondisk->size) != ondisk->size)
4272 {
4273 int save_errno = errno;
4274
4276
4277 /* if write didn't set errno, assume problem is no disk space */
4279 ereport(ERROR,
4281 errmsg("could not write to data file for XID %u: %m",
4282 txn->xid)));
4283 }
4285
4286 /*
4287 * Keep the transaction's final_lsn up to date with each change we send to
4288 * disk, so that ReorderBufferRestoreCleanup works correctly. (We used to
4289 * only do this on commit and abort records, but that doesn't work if a
4290 * system crash leaves a transaction without its abort record).
4291 *
4292 * Make sure not to move it backwards.
4293 */
4294 if (txn->final_lsn < change->lsn)
4295 txn->final_lsn = change->lsn;
4296
4297 Assert(ondisk->change.action == change->action);
4298}
4299
4300/* Returns true, if the output plugin supports streaming, false, otherwise. */
4301static inline bool
4303{
4304 LogicalDecodingContext *ctx = rb->private_data;
4305
4306 return ctx->streaming;
4307}
4308
4309/* Returns true, if the streaming can be started now, false, otherwise. */
4310static inline bool
4312{
4313 LogicalDecodingContext *ctx = rb->private_data;
4314 SnapBuild *builder = ctx->snapshot_builder;
4315
4316 /* We can't start streaming unless a consistent state is reached. */
4318 return false;
4319
4320 /*
4321 * We can't start streaming immediately even if the streaming is enabled
4322 * because we previously decoded this transaction and now just are
4323 * restarting.
4324 */
4326 !SnapBuildXactNeedsSkip(builder, ctx->reader->ReadRecPtr))
4327 return true;
4328
4329 return false;
4330}
4331
4332/*
4333 * Send data of a large transaction (and its subtransactions) to the
4334 * output plugin, but using the stream API.
4335 */
4336static void
4338{
4339 Snapshot snapshot_now;
4340 CommandId command_id;
4341 Size stream_bytes;
4342 bool txn_is_streamed;
4343
4344 /* We can never reach here for a subtransaction. */
4345 Assert(rbtxn_is_toptxn(txn));
4346
4347 /*
4348 * We can't make any assumptions about base snapshot here, similar to what
4349 * ReorderBufferCommit() does. That relies on base_snapshot getting
4350 * transferred from subxact in ReorderBufferCommitChild(), but that was
4351 * not yet called as the transaction is in-progress.
4352 *
4353 * So just walk the subxacts and use the same logic here. But we only need
4354 * to do that once, when the transaction is streamed for the first time.
4355 * After that we need to reuse the snapshot from the previous run.
4356 *
4357 * Unlike DecodeCommit which adds xids of all the subtransactions in
4358 * snapshot's xip array via SnapBuildCommitTxn, we can't do that here but
4359 * we do add them to subxip array instead via ReorderBufferCopySnap. This
4360 * allows the catalog changes made in subtransactions decoded till now to
4361 * be visible.
4362 */
4363 if (txn->snapshot_now == NULL)
4364 {
4366
4367 /* make sure this transaction is streamed for the first time */
4369
4370 /* at the beginning we should have invalid command ID */
4372
4374 {
4376
4379 }
4380
4381 /*
4382 * If this transaction has no snapshot, it didn't make any changes to
4383 * the database till now, so there's nothing to decode.
4384 */
4385 if (txn->base_snapshot == NULL)
4386 {
4387 Assert(txn->ninvalidations == 0);
4388 return;
4389 }
4390
4391 command_id = FirstCommandId;
4392 snapshot_now = ReorderBufferCopySnap(rb, txn->base_snapshot,
4393 txn, command_id);
4394 }
4395 else
4396 {
4397 /* the transaction must have been already streamed */
4399
4400 /*
4401 * Nah, we already have snapshot from the previous streaming run. We
4402 * assume new subxacts can't move the LSN backwards, and so can't beat
4403 * the LSN condition in the previous branch (so no need to walk
4404 * through subxacts again). In fact, we must not do that as we may be
4405 * using snapshot half-way through the subxact.
4406 */
4407 command_id = txn->command_id;
4408
4409 /*
4410 * We can't use txn->snapshot_now directly because after the last
4411 * streaming run, we might have got some new sub-transactions. So we
4412 * need to add them to the snapshot.
4413 */
4414 snapshot_now = ReorderBufferCopySnap(rb, txn->snapshot_now,
4415 txn, command_id);
4416
4417 /* Free the previously copied snapshot. */
4418 Assert(txn->snapshot_now->copied);
4420 txn->snapshot_now = NULL;
4421 }
4422
4423 /*
4424 * Remember this information to be used later to update stats. We can't
4425 * update the stats here as an error while processing the changes would
4426 * lead to the accumulation of stats even though we haven't streamed all
4427 * the changes.
4428 */
4430 stream_bytes = txn->total_size;
4431
4432 /* Process and send the changes to output plugin. */
4433 ReorderBufferProcessTXN(rb, txn, InvalidXLogRecPtr, snapshot_now,
4434 command_id, true);
4435
4436 rb->streamCount += 1;
4437 rb->streamBytes += stream_bytes;
4438
4439 /* Don't consider already streamed transaction. */
4440 rb->streamTxns += (txn_is_streamed) ? 0 : 1;
4441
4442 /* update the decoding stats */
4444
4446 Assert(txn->nentries == 0);
4447 Assert(txn->nentries_mem == 0);
4448}
4449
4450/*
4451 * Size of a change in memory.
4452 */
4453static Size
4455{
4456 Size sz = sizeof(ReorderBufferChange);
4457
4458 switch (change->action)
4459 {
4460 /* fall through these, they're all similar enough */
4465 {
4467 newtup;
4468 Size oldlen = 0;
4469 Size newlen = 0;
4470
4471 oldtup = change->data.tp.oldtuple;
4472 newtup = change->data.tp.newtuple;
4473
4474 if (oldtup)
4475 {
4476 sz += sizeof(HeapTupleData);
4477 oldlen = oldtup->t_len;
4478 sz += oldlen;
4479 }
4480
4481 if (newtup)
4482 {
4483 sz += sizeof(HeapTupleData);
4484 newlen = newtup->t_len;
4485 sz += newlen;
4486 }
4487
4488 break;
4489 }
4491 {
4492 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4493
4494 sz += prefix_size + change->data.msg.message_size +
4495 sizeof(Size) + sizeof(Size);
4496
4497 break;
4498 }
4500 {
4501 sz += sizeof(SharedInvalidationMessage) *
4502 change->data.inval.ninvalidations;
4503 break;
4504 }
4506 {
4507 Snapshot snap;
4508
4509 snap = change->data.snapshot;
4510
4511 sz += sizeof(SnapshotData) +
4512 sizeof(TransactionId) * snap->xcnt +
4513 sizeof(TransactionId) * snap->subxcnt;
4514
4515 break;
4516 }
4518 {
4519 sz += sizeof(Oid) * change->data.truncate.nrelids;
4520
4521 break;
4522 }
4527 /* ReorderBufferChange contains everything important */
4528 break;
4529 }
4530
4531 return sz;
4532}
4533
4534
4535/*
4536 * Restore a number of changes spilled to disk back into memory.
4537 */
4538static Size
4540 TXNEntryFile *file, XLogSegNo *segno)
4541{
4542 Size restored = 0;
4545 File *fd = &file->vfd;
4546
4549
4550 /* free current entries, so we have memory for more */
4552 {
4555
4556 dlist_delete(&cleanup->node);
4558 }
4559 txn->nentries_mem = 0;
4561
4563
4564 while (restored < max_changes_in_memory && *segno <= last_segno)
4565 {
4566 int readBytes;
4568
4570
4571 if (*fd == -1)
4572 {
4573 char path[MAXPGPATH];
4574
4575 /* first time in */
4576 if (*segno == 0)
4577 XLByteToSeg(txn->first_lsn, *segno, wal_segment_size);
4578
4579 Assert(*segno != 0 || dlist_is_empty(&txn->changes));
4580
4581 /*
4582 * No need to care about TLIs here, only used during a single run,
4583 * so each LSN only maps to a specific WAL record.
4584 */
4586 *segno);
4587
4589
4590 /* No harm in resetting the offset even in case of failure */
4591 file->curOffset = 0;
4592
4593 if (*fd < 0 && errno == ENOENT)
4594 {
4595 *fd = -1;
4596 (*segno)++;
4597 continue;
4598 }
4599 else if (*fd < 0)
4600 ereport(ERROR,
4602 errmsg("could not open file \"%s\": %m",
4603 path)));
4604 }
4605
4606 /*
4607 * Read the statically sized part of a change which has information
4608 * about the total size. If we couldn't read a record, we're at the
4609 * end of this file.
4610 */
4612 readBytes = FileRead(file->vfd, rb->outbuf,
4615
4616 /* eof */
4617 if (readBytes == 0)
4618 {
4619 FileClose(*fd);
4620 *fd = -1;
4621 (*segno)++;
4622 continue;
4623 }
4624 else if (readBytes < 0)
4625 ereport(ERROR,
4627 errmsg("could not read from reorderbuffer spill file: %m")));
4628 else if (readBytes != sizeof(ReorderBufferDiskChange))
4629 ereport(ERROR,
4631 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4632 readBytes,
4633 (uint32) sizeof(ReorderBufferDiskChange))));
4634
4635 file->curOffset += readBytes;
4636
4637 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4638
4640 sizeof(ReorderBufferDiskChange) + ondisk->size);
4641 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4642
4643 readBytes = FileRead(file->vfd,
4644 rb->outbuf + sizeof(ReorderBufferDiskChange),
4645 ondisk->size - sizeof(ReorderBufferDiskChange),
4646 file->curOffset,
4648
4649 if (readBytes < 0)
4650 ereport(ERROR,
4652 errmsg("could not read from reorderbuffer spill file: %m")));
4653 else if (readBytes != ondisk->size - sizeof(ReorderBufferDiskChange))
4654 ereport(ERROR,
4656 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4657 readBytes,
4658 (uint32) (ondisk->size - sizeof(ReorderBufferDiskChange)))));
4659
4660 file->curOffset += readBytes;
4661
4662 /*
4663 * ok, read a full change from disk, now restore it into proper
4664 * in-memory format
4665 */
4666 ReorderBufferRestoreChange(rb, txn, rb->outbuf);
4667 restored++;
4668 }
4669
4670 return restored;
4671}
4672
4673/*
4674 * Convert change from its on-disk format to in-memory format and queue it onto
4675 * the TXN's ->changes list.
4676 *
4677 * Note: although "data" is declared char*, at entry it points to a
4678 * maxalign'd buffer, making it safe in most of this function to assume
4679 * that the pointed-to data is suitably aligned for direct access.
4680 */
4681static void
4683 char *data)
4684{
4686 ReorderBufferChange *change;
4687
4688 ondisk = (ReorderBufferDiskChange *) data;
4689
4690 change = ReorderBufferAllocChange(rb);
4691
4692 /* copy static part */
4693 memcpy(change, &ondisk->change, sizeof(ReorderBufferChange));
4694
4695 data += sizeof(ReorderBufferDiskChange);
4696
4697 /* restore individual stuff */
4698 switch (change->action)
4699 {
4700 /* fall through these, they're all similar enough */
4705 if (change->data.tp.oldtuple)
4706 {
4707 uint32 tuplelen = ((HeapTuple) data)->t_len;
4708
4709 change->data.tp.oldtuple =
4711
4712 /* restore ->tuple */
4713 memcpy(change->data.tp.oldtuple, data,
4714 sizeof(HeapTupleData));
4715 data += sizeof(HeapTupleData);
4716
4717 /* reset t_data pointer into the new tuplebuf */
4718 change->data.tp.oldtuple->t_data =
4719 (HeapTupleHeader) ((char *) change->data.tp.oldtuple + HEAPTUPLESIZE);
4720
4721 /* restore tuple data itself */
4723 data += tuplelen;
4724 }
4725
4726 if (change->data.tp.newtuple)
4727 {
4728 /* here, data might not be suitably aligned! */
4730
4732 sizeof(uint32));
4733
4734 change->data.tp.newtuple =
4736
4737 /* restore ->tuple */
4738 memcpy(change->data.tp.newtuple, data,
4739 sizeof(HeapTupleData));
4740 data += sizeof(HeapTupleData);
4741
4742 /* reset t_data pointer into the new tuplebuf */
4743 change->data.tp.newtuple->t_data =
4744 (HeapTupleHeader) ((char *) change->data.tp.newtuple + HEAPTUPLESIZE);
4745
4746 /* restore tuple data itself */
4748 data += tuplelen;
4749 }
4750
4751 break;
4753 {
4754 Size prefix_size;
4755
4756 /* read prefix */
4757 memcpy(&prefix_size, data, sizeof(Size));
4758 data += sizeof(Size);
4759 change->data.msg.prefix = MemoryContextAlloc(rb->context,
4760 prefix_size);
4761 memcpy(change->data.msg.prefix, data, prefix_size);
4762 Assert(change->data.msg.prefix[prefix_size - 1] == '\0');
4763 data += prefix_size;
4764
4765 /* read the message */
4766 memcpy(&change->data.msg.message_size, data, sizeof(Size));
4767 data += sizeof(Size);
4768 change->data.msg.message = MemoryContextAlloc(rb->context,
4769 change->data.msg.message_size);
4770 memcpy(change->data.msg.message, data,
4771 change->data.msg.message_size);
4772 data += change->data.msg.message_size;
4773
4774 break;
4775 }
4777 {
4779 change->data.inval.ninvalidations;
4780
4781 change->data.inval.invalidations =
4782 MemoryContextAlloc(rb->context, inval_size);
4783
4784 /* read the message */
4786
4787 break;
4788 }
4790 {
4793 Size size;
4794
4795 oldsnap = (Snapshot) data;
4796
4797 size = sizeof(SnapshotData) +
4798 sizeof(TransactionId) * oldsnap->xcnt +
4799 sizeof(TransactionId) * (oldsnap->subxcnt + 0);
4800
4801 change->data.snapshot = MemoryContextAllocZero(rb->context, size);
4802
4803 newsnap = change->data.snapshot;
4804
4805 memcpy(newsnap, data, size);
4806 newsnap->xip = (TransactionId *)
4807 (((char *) newsnap) + sizeof(SnapshotData));
4808 newsnap->subxip = newsnap->xip + newsnap->xcnt;
4809 newsnap->copied = true;
4810 break;
4811 }
4812 /* the base struct contains all the data, easy peasy */
4814 {
4815 Oid *relids;
4816
4817 relids = ReorderBufferAllocRelids(rb, change->data.truncate.nrelids);
4818 memcpy(relids, data, change->data.truncate.nrelids * sizeof(Oid));
4819 change->data.truncate.relids = relids;
4820
4821 break;
4822 }
4827 break;
4828 }
4829
4830 dlist_push_tail(&txn->changes, &change->node);
4831 txn->nentries_mem++;
4832
4833 /*
4834 * Update memory accounting for the restored change. We need to do this
4835 * although we don't check the memory limit when restoring the changes in
4836 * this branch (we only do that when initially queueing the changes after
4837 * decoding), because we will release the changes later, and that will
4838 * update the accounting too (subtracting the size from the counters). And
4839 * we don't want to underflow there.
4840 */
4842 ReorderBufferChangeSize(change));
4843}
4844
4845/*
4846 * Remove all on-disk stored for the passed in transaction.
4847 */
4848static void
4850{
4851 XLogSegNo first;
4852 XLogSegNo cur;
4853 XLogSegNo last;
4854
4857
4860
4861 /* iterate over all possible filenames, and delete them */
4862 for (cur = first; cur <= last; cur++)
4863 {
4864 char path[MAXPGPATH];
4865
4867 if (unlink(path) != 0 && errno != ENOENT)
4868 ereport(ERROR,
4870 errmsg("could not remove file \"%s\": %m", path)));
4871 }
4872}
4873
4874/*
4875 * Remove any leftover serialized reorder buffers from a slot directory after a
4876 * prior crash or decoding session exit.
4877 */
4878static void
4879ReorderBufferCleanupSerializedTXNs(const char *slotname)
4880{
4881 DIR *spill_dir;
4882 struct dirent *spill_de;
4883 struct stat statbuf;
4884 char path[MAXPGPATH * 2 + sizeof(PG_REPLSLOT_DIR)];
4885
4886 sprintf(path, "%s/%s", PG_REPLSLOT_DIR, slotname);
4887
4888 /* we're only handling directories here, skip if it's not ours */
4889 if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
4890 return;
4891
4892 spill_dir = AllocateDir(path);
4893 while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL)
4894 {
4895 /* only look at names that can be ours */
4896 if (strncmp(spill_de->d_name, "xid", 3) == 0)
4897 {
4898 snprintf(path, sizeof(path),
4899 "%s/%s/%s", PG_REPLSLOT_DIR, slotname,
4900 spill_de->d_name);
4901
4902 if (unlink(path) != 0)
4903 ereport(ERROR,
4905 errmsg("could not remove file \"%s\" during removal of %s/%s/xid*: %m",
4906 path, PG_REPLSLOT_DIR, slotname)));
4907 }
4908 }
4910}
4911
4912/*
4913 * Given a replication slot, transaction ID and segment number, fill in the
4914 * corresponding spill file into 'path', which is a caller-owned buffer of size
4915 * at least MAXPGPATH.
4916 */
4917static void
4919 XLogSegNo segno)
4920{
4922
4924
4925 snprintf(path, MAXPGPATH, "%s/%s/xid-%u-lsn-%X-%X.spill",
4928 xid, LSN_FORMAT_ARGS(recptr));
4929}
4930
4931/*
4932 * Delete all data spilled to disk after we've restarted/crashed. It will be
4933 * recreated when the respective slots are reused.
4934 */
4935void
4937{
4939 struct dirent *logical_de;
4940
4943 {
4944 if (strcmp(logical_de->d_name, ".") == 0 ||
4945 strcmp(logical_de->d_name, "..") == 0)
4946 continue;
4947
4948 /* if it cannot be a slot, skip the directory */
4949 if (!ReplicationSlotValidateName(logical_de->d_name, true, DEBUG2))
4950 continue;
4951
4952 /*
4953 * ok, has to be a surviving logical slot, iterate and delete
4954 * everything starting with xid-*
4955 */
4957 }
4959}
4960
4961/* ---------------------------------------
4962 * toast reassembly support
4963 * ---------------------------------------
4964 */
4965
4966/*
4967 * Initialize per tuple toast reconstruction support.
4968 */
4969static void
4971{
4973
4974 Assert(txn->toast_hash == NULL);
4975
4976 hash_ctl.keysize = sizeof(Oid);
4977 hash_ctl.entrysize = sizeof(ReorderBufferToastEnt);
4978 hash_ctl.hcxt = rb->context;
4979 txn->toast_hash = hash_create("ReorderBufferToastHash", 5, &hash_ctl,
4981}
4982
4983/*
4984 * Per toast-chunk handling for toast reconstruction
4985 *
4986 * Appends a toast chunk so we can reconstruct it when the tuple "owning" the
4987 * toasted Datum comes along.
4988 */
4989static void
4991 Relation relation, ReorderBufferChange *change)
4992{
4995 bool found;
4997 bool isnull;
4998 Pointer chunk;
4999 TupleDesc desc = RelationGetDescr(relation);
5000 Oid chunk_id;
5002
5003 if (txn->toast_hash == NULL)
5005
5006 Assert(IsToastRelation(relation));
5007
5008 newtup = change->data.tp.newtuple;
5009 chunk_id = DatumGetObjectId(fastgetattr(newtup, 1, desc, &isnull));
5010 Assert(!isnull);
5011 chunk_seq = DatumGetInt32(fastgetattr(newtup, 2, desc, &isnull));
5012 Assert(!isnull);
5013
5015 hash_search(txn->toast_hash, &chunk_id, HASH_ENTER, &found);
5016
5017 if (!found)
5018 {
5019 Assert(ent->chunk_id == chunk_id);
5020 ent->num_chunks = 0;
5021 ent->last_chunk_seq = 0;
5022 ent->size = 0;
5023 ent->reconstructed = NULL;
5024 dlist_init(&ent->chunks);
5025
5026 if (chunk_seq != 0)
5027 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq 0",
5028 chunk_seq, chunk_id);
5029 }
5030 else if (found && chunk_seq != ent->last_chunk_seq + 1)
5031 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq %d",
5032 chunk_seq, chunk_id, ent->last_chunk_seq + 1);
5033
5034 chunk = DatumGetPointer(fastgetattr(newtup, 3, desc, &isnull));
5035 Assert(!isnull);
5036
5037 /* calculate size so we can allocate the right size at once later */
5038 if (!VARATT_IS_EXTENDED(chunk))
5039 chunksize = VARSIZE(chunk) - VARHDRSZ;
5040 else if (VARATT_IS_SHORT(chunk))
5041 /* could happen due to heap_form_tuple doing its thing */
5043 else
5044 elog(ERROR, "unexpected type of toast chunk");
5045
5046 ent->size += chunksize;
5047 ent->last_chunk_seq = chunk_seq;
5048 ent->num_chunks++;
5049 dlist_push_tail(&ent->chunks, &change->node);
5050}
5051
5052/*
5053 * Rejigger change->newtuple to point to in-memory toast tuples instead of
5054 * on-disk toast tuples that may no longer exist (think DROP TABLE or VACUUM).
5055 *
5056 * We cannot replace unchanged toast tuples though, so those will still point
5057 * to on-disk toast data.
5058 *
5059 * While updating the existing change with detoasted tuple data, we need to
5060 * update the memory accounting info, because the change size will differ.
5061 * Otherwise the accounting may get out of sync, triggering serialization
5062 * at unexpected times.
5063 *
5064 * We simply subtract size of the change before rejiggering the tuple, and
5065 * then add the new size. This makes it look like the change was removed
5066 * and then added back, except it only tweaks the accounting info.
5067 *
5068 * In particular it can't trigger serialization, which would be pointless
5069 * anyway as it happens during commit processing right before handing
5070 * the change to the output plugin.
5071 */
5072static void
5074 Relation relation, ReorderBufferChange *change)
5075{
5076 TupleDesc desc;
5077 int natt;
5078 Datum *attrs;
5079 bool *isnull;
5080 bool *free;
5082 Relation toast_rel;
5084 MemoryContext oldcontext;
5086 Size old_size;
5087
5088 /* no toast tuples changed */
5089 if (txn->toast_hash == NULL)
5090 return;
5091
5092 /*
5093 * We're going to modify the size of the change. So, to make sure the
5094 * accounting is correct we record the current change size and then after
5095 * re-computing the change we'll subtract the recorded size and then
5096 * re-add the new change size at the end. We don't immediately subtract
5097 * the old size because if there is any error before we add the new size,
5098 * we will release the changes and that will update the accounting info
5099 * (subtracting the size from the counters). And we don't want to
5100 * underflow there.
5101 */
5103
5104 oldcontext = MemoryContextSwitchTo(rb->context);
5105
5106 /* we should only have toast tuples in an INSERT or UPDATE */
5107 Assert(change->data.tp.newtuple);
5108
5109 desc = RelationGetDescr(relation);
5110
5111 toast_rel = RelationIdGetRelation(relation->rd_rel->reltoastrelid);
5112 if (!RelationIsValid(toast_rel))
5113 elog(ERROR, "could not open toast relation with OID %u (base relation \"%s\")",
5114 relation->rd_rel->reltoastrelid, RelationGetRelationName(relation));
5115
5116 toast_desc = RelationGetDescr(toast_rel);
5117
5118 /* should we allocate from stack instead? */
5119 attrs = palloc0_array(Datum, desc->natts);
5120 isnull = palloc0_array(bool, desc->natts);
5121 free = palloc0_array(bool, desc->natts);
5122
5123 newtup = change->data.tp.newtuple;
5124
5125 heap_deform_tuple(newtup, desc, attrs, isnull);
5126
5127 for (natt = 0; natt < desc->natts; natt++)
5128 {
5132
5133 /* va_rawsize is the size of the original datum -- including header */
5134 varatt_external toast_pointer;
5137 varlena *reconstructed;
5138 dlist_iter it;
5139 Size data_done = 0;
5140
5141 if (attr->attisdropped)
5142 continue;
5143
5144 /* not a varlena datatype */
5145 if (attr->attlen != -1)
5146 continue;
5147
5148 /* no data */
5149 if (isnull[natt])
5150 continue;
5151
5152 /* ok, we know we have a toast datum */
5154
5155 /* no need to do anything if the tuple isn't external */
5157 continue;
5158
5160
5161 /*
5162 * Check whether the toast tuple changed, replace if so.
5163 */
5166 &toast_pointer.va_valueid,
5167 HASH_FIND,
5168 NULL);
5169 if (ent == NULL)
5170 continue;
5171
5172 new_datum =
5174
5175 free[natt] = true;
5176
5177 reconstructed = palloc0(toast_pointer.va_rawsize);
5178
5179 ent->reconstructed = reconstructed;
5180
5181 /* stitch toast tuple back together from its parts */
5182 dlist_foreach(it, &ent->chunks)
5183 {
5184 bool cisnull;
5187 Pointer chunk;
5188
5190 ctup = cchange->data.tp.newtuple;
5192
5193 Assert(!cisnull);
5194 Assert(!VARATT_IS_EXTERNAL(chunk));
5195 Assert(!VARATT_IS_SHORT(chunk));
5196
5197 memcpy(VARDATA(reconstructed) + data_done,
5198 VARDATA(chunk),
5199 VARSIZE(chunk) - VARHDRSZ);
5200 data_done += VARSIZE(chunk) - VARHDRSZ;
5201 }
5202 Assert(data_done == VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer));
5203
5204 /* make sure its marked as compressed or not */
5205 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
5206 SET_VARSIZE_COMPRESSED(reconstructed, data_done + VARHDRSZ);
5207 else
5208 SET_VARSIZE(reconstructed, data_done + VARHDRSZ);
5209
5211 redirect_pointer.pointer = reconstructed;
5212
5215 sizeof(redirect_pointer));
5216
5218 }
5219
5220 /*
5221 * Build tuple in separate memory & copy tuple back into the tuplebuf
5222 * passed to the output plugin. We can't directly heap_fill_tuple() into
5223 * the tuplebuf because attrs[] will point back into the current content.
5224 */
5225 tmphtup = heap_form_tuple(desc, attrs, isnull);
5226 Assert(newtup->t_len <= MaxHeapTupleSize);
5227 Assert(newtup->t_data == (HeapTupleHeader) ((char *) newtup + HEAPTUPLESIZE));
5228
5229 memcpy(newtup->t_data, tmphtup->t_data, tmphtup->t_len);
5230 newtup->t_len = tmphtup->t_len;
5231
5232 /*
5233 * free resources we won't further need, more persistent stuff will be
5234 * free'd in ReorderBufferToastReset().
5235 */
5236 RelationClose(toast_rel);
5237 pfree(tmphtup);
5238 for (natt = 0; natt < desc->natts; natt++)
5239 {
5240 if (free[natt])
5242 }
5243 pfree(attrs);
5244 pfree(free);
5245 pfree(isnull);
5246
5247 MemoryContextSwitchTo(oldcontext);
5248
5249 /* subtract the old change size */
5251 /* now add the change back, with the correct size */
5253 ReorderBufferChangeSize(change));
5254}
5255
5256/*
5257 * Free all resources allocated for toast reconstruction.
5258 */
5259static void
5261{
5264
5265 if (txn->toast_hash == NULL)
5266 return;
5267
5268 /* sequentially walk over the hash and free everything */
5271 {
5273
5274 if (ent->reconstructed != NULL)
5275 pfree(ent->reconstructed);
5276
5277 dlist_foreach_modify(it, &ent->chunks)
5278 {
5279 ReorderBufferChange *change =
5281
5282 dlist_delete(&change->node);
5283 ReorderBufferFreeChange(rb, change, true);
5284 }
5285 }
5286
5288 txn->toast_hash = NULL;
5289}
5290
5291
5292/* ---------------------------------------
5293 * Visibility support for logical decoding
5294 *
5295 *
5296 * Lookup actual cmin/cmax values when using decoding snapshot. We can't
5297 * always rely on stored cmin/cmax values because of two scenarios:
5298 *
5299 * * A tuple got changed multiple times during a single transaction and thus
5300 * has got a combo CID. Combo CIDs are only valid for the duration of a
5301 * single transaction.
5302 * * A tuple with a cmin but no cmax (and thus no combo CID) got
5303 * deleted/updated in another transaction than the one which created it
5304 * which we are looking at right now. As only one of cmin, cmax or combo CID
5305 * is actually stored in the heap we don't have access to the value we
5306 * need anymore.
5307 *
5308 * To resolve those problems we have a per-transaction hash of (cmin,
5309 * cmax) tuples keyed by (relfilelocator, ctid) which contains the actual
5310 * (cmin, cmax) values. That also takes care of combo CIDs by simply
5311 * not caring about them at all. As we have the real cmin/cmax values
5312 * combo CIDs aren't interesting.
5313 *
5314 * As we only care about catalog tuples here the overhead of this
5315 * hashtable should be acceptable.
5316 *
5317 * Heap rewrites complicate this a bit, check rewriteheap.c for
5318 * details.
5319 * -------------------------------------------------------------------------
5320 */
5321
5322/* struct for sorting mapping files by LSN efficiently */
5323typedef struct RewriteMappingFile
5324{
5326 char fname[MAXPGPATH];
5328
5329#ifdef NOT_USED
5330static void
5332{
5335
5338 {
5339 elog(DEBUG3, "mapping: node: %u/%u/%u tid: %u/%u cmin: %u, cmax: %u",
5340 ent->key.rlocator.dbOid,
5341 ent->key.rlocator.spcOid,
5342 ent->key.rlocator.relNumber,
5343 ItemPointerGetBlockNumber(&ent->key.tid),
5345 ent->cmin,
5346 ent->cmax
5347 );
5348 }
5349}
5350#endif
5351
5352/*
5353 * Apply a single mapping file to tuplecid_data.
5354 *
5355 * The mapping file has to have been verified to be a) committed b) for our
5356 * transaction c) applied in LSN order.
5357 */
5358static void
5359ApplyLogicalMappingFile(HTAB *tuplecid_data, const char *fname)
5360{
5361 char path[MAXPGPATH];
5362 int fd;
5363 int readBytes;
5365
5366 sprintf(path, "%s/%s", PG_LOGICAL_MAPPINGS_DIR, fname);
5368 if (fd < 0)
5369 ereport(ERROR,
5371 errmsg("could not open file \"%s\": %m", path)));
5372
5373 while (true)
5374 {
5378 bool found;
5379
5380 /* be careful about padding */
5381 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
5382
5383 /* read all mappings till the end of the file */
5385 readBytes = read(fd, &map, sizeof(LogicalRewriteMappingData));
5387
5388 if (readBytes < 0)
5389 ereport(ERROR,
5391 errmsg("could not read file \"%s\": %m",
5392 path)));
5393 else if (readBytes == 0) /* EOF */
5394 break;
5395 else if (readBytes != sizeof(LogicalRewriteMappingData))
5396 ereport(ERROR,
5398 errmsg("could not read from file \"%s\": read %d instead of %d bytes",
5399 path, readBytes,
5400 (int32) sizeof(LogicalRewriteMappingData))));
5401
5402 key.rlocator = map.old_locator;
5404 &key.tid);
5405
5406
5409
5410 /* no existing mapping, no need to update */
5411 if (!ent)
5412 continue;
5413
5414 key.rlocator = map.new_locator;
5416 &key.tid);
5417
5419 hash_search(tuplecid_data, &key, HASH_ENTER, &found);
5420
5421 if (found)
5422 {
5423 /*
5424 * Make sure the existing mapping makes sense. We sometime update
5425 * old records that did not yet have a cmax (e.g. pg_class' own
5426 * entry while rewriting it) during rewrites, so allow that.
5427 */
5428 Assert(ent->cmin == InvalidCommandId || ent->cmin == new_ent->cmin);
5429 Assert(ent->cmax == InvalidCommandId || ent->cmax == new_ent->cmax);
5430 }
5431 else
5432 {
5433 /* update mapping */
5434 new_ent->cmin = ent->cmin;
5435 new_ent->cmax = ent->cmax;
5436 new_ent->combocid = ent->combocid;
5437 }
5438 }
5439
5440 if (CloseTransientFile(fd) != 0)
5441 ereport(ERROR,
5443 errmsg("could not close file \"%s\": %m", path)));
5444}
5445
5446
5447/*
5448 * Check whether the TransactionId 'xid' is in the pre-sorted array 'xip'.
5449 */
5450static bool
5452{
5453 return bsearch(&xid, xip, num,
5454 sizeof(TransactionId), xidComparator) != NULL;
5455}
5456
5457/*
5458 * list_sort() comparator for sorting RewriteMappingFiles in LSN order.
5459 */
5460static int
5462{
5465
5466 return pg_cmp_u64(a->lsn, b->lsn);
5467}
5468
5469/*
5470 * Apply any existing logical remapping files if there are any targeted at our
5471 * transaction for relid.
5472 */
5473static void
5475{
5477 struct dirent *mapping_de;
5478 List *files = NIL;
5479 ListCell *file;
5480 Oid dboid = IsSharedRelation(relid) ? InvalidOid : MyDatabaseId;
5481
5484 {
5485 Oid f_dboid;
5486 Oid f_relid;
5490 uint32 f_hi,
5491 f_lo;
5493
5494 if (strcmp(mapping_de->d_name, ".") == 0 ||
5495 strcmp(mapping_de->d_name, "..") == 0)
5496 continue;
5497
5498 /* Ignore files that aren't ours */
5499 if (strncmp(mapping_de->d_name, "map-", 4) != 0)
5500 continue;
5501
5503 &f_dboid, &f_relid, &f_hi, &f_lo,
5504 &f_mapped_xid, &f_create_xid) != 6)
5505 elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
5506
5507 f_lsn = ((uint64) f_hi) << 32 | f_lo;
5508
5509 /* mapping for another database */
5510 if (f_dboid != dboid)
5511 continue;
5512
5513 /* mapping for another relation */
5514 if (f_relid != relid)
5515 continue;
5516
5517 /* did the creating transaction abort? */
5519 continue;
5520
5521 /* not for our transaction */
5522 if (!TransactionIdInArray(f_mapped_xid, snapshot->subxip, snapshot->subxcnt))
5523 continue;
5524
5525 /* ok, relevant, queue for apply */
5527 f->lsn = f_lsn;
5528 strcpy(f->fname, mapping_de->d_name);
5529 files = lappend(files, f);
5530 }
5532
5533 /* sort files so we apply them in LSN order */
5535
5536 foreach(file, files)
5537 {
5539
5540 elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
5541 snapshot->subxip[0]);
5543 pfree(f);
5544 }
5545}
5546
5547/*
5548 * Lookup cmin/cmax of a tuple, during logical decoding where we can't rely on
5549 * combo CIDs.
5550 */
5551bool
5553 Snapshot snapshot,
5554 HeapTuple htup, Buffer buffer,
5555 CommandId *cmin, CommandId *cmax)
5556{
5559 ForkNumber forkno;
5560 BlockNumber blockno;
5561 bool updated_mapping = false;
5562
5563 /*
5564 * Return unresolved if tuplecid_data is not valid. That's because when
5565 * streaming in-progress transactions we may run into tuples with the CID
5566 * before actually decoding them. Think e.g. about INSERT followed by
5567 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5568 * INSERT. So in such cases, we assume the CID is from the future
5569 * command.
5570 */
5571 if (tuplecid_data == NULL)
5572 return false;
5573
5574 /* be careful about padding */
5575 memset(&key, 0, sizeof(key));
5576
5577 Assert(!BufferIsLocal(buffer));
5578
5579 /*
5580 * get relfilelocator from the buffer, no convenient way to access it
5581 * other than that.
5582 */
5583 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5584
5585 /* tuples can only be in the main fork */
5586 Assert(forkno == MAIN_FORKNUM);
5587 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5588
5589 ItemPointerCopy(&htup->t_self,
5590 &key.tid);
5591
5592restart:
5595
5596 /*
5597 * failed to find a mapping, check whether the table was rewritten and
5598 * apply mapping if so, but only do that once - there can be no new
5599 * mappings while we are in here since we have to hold a lock on the
5600 * relation.
5601 */
5602 if (ent == NULL && !updated_mapping)
5603 {
5605 /* now check but don't update for a mapping again */
5606 updated_mapping = true;
5607 goto restart;
5608 }
5609 else if (ent == NULL)
5610 return false;
5611
5612 if (cmin)
5613 *cmin = ent->cmin;
5614 if (cmax)
5615 *cmax = ent->cmax;
5616 return true;
5617}
5618
5619/*
5620 * Count invalidation messages of specified transaction.
5621 *
5622 * Returns number of messages, and msgs is set to the pointer of the linked
5623 * list for the messages.
5624 */
5625uint32
5628{
5629 ReorderBufferTXN *txn;
5630
5631 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
5632 false);
5633
5634 if (txn == NULL)
5635 return 0;
5636
5637 *msgs = txn->invalidations;
5638
5639 return txn->ninvalidations;
5640}
void binaryheap_build(binaryheap *heap)
Definition binaryheap.c:136
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:253
bh_node_type binaryheap_first(binaryheap *heap)
Definition binaryheap.c:175
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition binaryheap.c:190
void binaryheap_free(binaryheap *heap)
Definition binaryheap.c:73
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:114
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition binaryheap.c:37
#define binaryheap_empty(h)
Definition binaryheap.h:65
uint32 BlockNumber
Definition block.h:31
static int32 next
Definition blutils.c:225
static void cleanup(void)
Definition bootstrap.c:886
int Buffer
Definition buf.h:23
#define BufferIsLocal(buffer)
Definition buf.h:37
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition bufmgr.c:4476
#define NameStr(name)
Definition c.h:835
#define InvalidCommandId
Definition c.h:753
#define VARHDRSZ
Definition c.h:781
#define Assert(condition)
Definition c.h:943
#define PG_BINARY
Definition c.h:1386
#define FirstCommandId
Definition c.h:752
int32_t int32
Definition c.h:620
uint64_t uint64
Definition c.h:625
#define unlikely(x)
Definition c.h:438
uint32_t uint32
Definition c.h:624
#define pg_fallthrough
Definition c.h:161
void * Pointer
Definition c.h:615
uint32 CommandId
Definition c.h:750
uint32 TransactionId
Definition c.h:736
size_t Size
Definition c.h:689
bool IsToastRelation(Relation relation)
Definition catalog.c:206
bool IsSharedRelation(Oid relationId)
Definition catalog.c:304
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
int64 TimestampTz
Definition timestamp.h:39
#define INDIRECT_POINTER_SIZE
Definition detoast.h:34
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr)
Definition detoast.h:22
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:889
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:360
void hash_destroy(HTAB *hashp)
Definition dynahash.c:802
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1352
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1317
struct cursor * cur
Definition ecpg.c:29
Datum arg
Definition elog.c:1323
void FreeErrorData(ErrorData *edata)
Definition elog.c:2014
int errcode_for_file_access(void)
Definition elog.c:898
ErrorData * CopyErrorData(void)
Definition elog.c:1942
void FlushErrorState(void)
Definition elog.c:2063
#define PG_RE_THROW()
Definition elog.h:407
#define DEBUG3
Definition elog.h:29
#define PG_TRY(...)
Definition elog.h:374
#define DEBUG2
Definition elog.h:30
#define PG_END_TRY(...)
Definition elog.h:399
#define DEBUG1
Definition elog.h:31
#define ERROR
Definition elog.h:40
#define PG_CATCH(...)
Definition elog.h:384
#define elog(elevel,...)
Definition elog.h:228
#define INFO
Definition elog.h:35
#define ereport(elevel,...)
Definition elog.h:152
struct SnapshotData * Snapshot
Definition execnodes.h:60
int FreeDir(DIR *dir)
Definition fd.c:3009
int CloseTransientFile(int fd)
Definition fd.c:2855
struct dirent * ReadDirExtended(DIR *dir, const char *dirname, int elevel)
Definition fd.c:2972
void FileClose(File file)
Definition fd.c:1966
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition fd.c:1563
DIR * AllocateDir(const char *dirname)
Definition fd.c:2891
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2957
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
static ssize_t FileRead(File file, void *buffer, size_t amount, pgoff_t offset, uint32 wait_event_info)
Definition fd.h:225
int File
Definition fd.h:51
#define palloc_object(type)
Definition fe_memutils.h:89
#define repalloc_array(pointer, type, count)
Definition fe_memutils.h:94
#define palloc_array(type, count)
Definition fe_memutils.h:91
#define palloc0_array(type, count)
Definition fe_memutils.h:92
MemoryContext GenerationContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition generation.c:162
Oid MyDatabaseId
Definition globals.c:96
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1025
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition heaptuple.c:1254
@ HASH_FIND
Definition hsearch.h:108
@ HASH_REMOVE
Definition hsearch.h:110
@ HASH_ENTER
Definition hsearch.h:109
#define HASH_CONTEXT
Definition hsearch.h:97
#define HASH_ELEM
Definition hsearch.h:90
#define HASH_BLOBS
Definition hsearch.h:92
#define HEAPTUPLESIZE
Definition htup.h:73
HeapTupleData * HeapTuple
Definition htup.h:71
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
#define SizeofHeapTupleHeader
#define MaxHeapTupleSize
static Datum fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
static dlist_node * dlist_pop_head_node(dlist_head *head)
Definition ilist.h:450
#define dlist_foreach(iter, lhead)
Definition ilist.h:623
static void dlist_init(dlist_head *head)
Definition ilist.h:314
#define dclist_container(type, membername, ptr)
Definition ilist.h:947
static bool dlist_has_next(const dlist_head *head, const dlist_node *node)
Definition ilist.h:503
static void dclist_push_tail(dclist_head *head, dlist_node *node)
Definition ilist.h:709
static void dlist_insert_before(dlist_node *before, dlist_node *node)
Definition ilist.h:393
#define dlist_head_element(type, membername, lhead)
Definition ilist.h:603
static dlist_node * dlist_next_node(dlist_head *head, dlist_node *node)
Definition ilist.h:537
static void dlist_delete(dlist_node *node)
Definition ilist.h:405
static uint32 dclist_count(const dclist_head *head)
Definition ilist.h:932
#define dlist_foreach_modify(iter, lhead)
Definition ilist.h:640
static bool dlist_is_empty(const dlist_head *head)
Definition ilist.h:336
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition ilist.h:364
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition ilist.h:763
static void dclist_init(dclist_head *head)
Definition ilist.h:671
#define dlist_container(type, membername, ptr)
Definition ilist.h:593
#define dclist_foreach(iter, lhead)
Definition ilist.h:970
static int pg_cmp_u64(uint64 a, uint64 b)
Definition int.h:731
#define write(a, b, c)
Definition win32.h:14
#define read(a, b, c)
Definition win32.h:13
void LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
Definition inval.c:823
void InvalidateSystemCaches(void)
Definition inval.c:916
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition itemptr.h:172
List * lappend(List *list, void *datum)
Definition list.c:339
void list_sort(List *list, list_sort_comparator cmp)
Definition list.c:1674
void UpdateDecodingStats(LogicalDecodingContext *ctx)
Definition logical.c:1951
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1235
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition mcxt.c:1269
char * pstrdup(const char *in)
Definition mcxt.c:1910
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1635
void pfree(void *pointer)
Definition mcxt.c:1619
void * palloc0(Size size)
Definition mcxt.c:1420
void * palloc(Size size)
Definition mcxt.c:1390
MemoryContext CurrentMemoryContext
Definition mcxt.c:161
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:475
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define SLAB_DEFAULT_BLOCK_SIZE
Definition memutils.h:189
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:125
static char * errmsg
void pairingheap_remove(pairingheap *heap, pairingheap_node *node)
void pairingheap_add(pairingheap *heap, pairingheap_node *node)
pairingheap * pairingheap_allocate(pairingheap_comparator compare, void *arg)
Definition pairingheap.c:42
pairingheap_node * pairingheap_first(pairingheap *heap)
#define pairingheap_container(type, membername, ptr)
Definition pairingheap.h:43
#define pairingheap_const_container(type, membername, ptr)
Definition pairingheap.h:51
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:138
#define MAXPGPATH
const void * data
#define lfirst(lc)
Definition pg_list.h:172
#define NIL
Definition pg_list.h:68
#define sprintf
Definition port.h:263
#define snprintf
Definition port.h:261
#define qsort(a, b, c, d)
Definition port.h:496
static Oid DatumGetObjectId(Datum X)
Definition postgres.h:242
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
static Datum Int32GetDatum(int32 X)
Definition postgres.h:212
static int32 DatumGetInt32(Datum X)
Definition postgres.h:202
#define PointerGetDatum(X)
Definition postgres.h:354
#define InvalidOid
unsigned int Oid
static int fd(const char *x, int i)
static int fb(int x)
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1393
#define RelationIsLogicallyLogged(relation)
Definition rel.h:721
#define RelationGetDescr(relation)
Definition rel.h:542
#define RelationGetRelationName(relation)
Definition rel.h:550
#define RelationIsValid(relation)
Definition rel.h:491
Relation RelationIdGetRelation(Oid relationId)
Definition relcache.c:2089
void RelationClose(Relation relation)
Definition relcache.c:2220
Oid RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber)
ForkNumber
Definition relpath.h:56
@ MAIN_FORKNUM
Definition relpath.h:58
#define relpathperm(rlocator, forknum)
Definition relpath.h:146
static int file_sort_by_lsn(const ListCell *a_p, const ListCell *b_p)
void ReorderBufferFreeRelids(ReorderBuffer *rb, Oid *relids)
void ReorderBufferFreeChange(ReorderBuffer *rb, ReorderBufferChange *change, bool upd_mem)
static void ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
void ReorderBufferXidSetCatalogChanges(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
static void ReorderBufferStreamCommit(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferAddNewCommandId(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, CommandId cid)
static void ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferReplay(ReorderBufferTXN *txn, ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
static void ReorderBufferAccumulateInvalidations(SharedInvalidationMessage **invals_out, uint32 *ninvals_out, SharedInvalidationMessage *msgs_new, Size nmsgs_new)
static ReorderBufferTXN * ReorderBufferLargestTXN(ReorderBuffer *rb)
void ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, RelFileLocator locator, ItemPointerData tid, CommandId cmin, CommandId cmax, CommandId combocid)
void ReorderBufferSetBaseSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
static void ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferToastInitHash(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, TimestampTz abort_time)
static bool ReorderBufferCanStartStreaming(ReorderBuffer *rb)
bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *rb, TransactionId xid)
void ReorderBufferInvalidate(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
TransactionId ReorderBufferGetOldestXmin(ReorderBuffer *rb)
static int ReorderBufferIterCompare(Datum a, Datum b, void *arg)
static void ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferIterTXNState *volatile *iter_state)
bool ResolveCminCmaxDuringDecoding(HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
static void ReorderBufferToastAppendChunk(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
void ReorderBufferFreeTupleBuf(HeapTuple tuple)
void ReorderBufferQueueChange(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, ReorderBufferChange *change, bool toast_insert)
void ReorderBufferPrepare(ReorderBuffer *rb, TransactionId xid, char *gid)
uint32 ReorderBufferGetInvalidations(ReorderBuffer *rb, TransactionId xid, SharedInvalidationMessage **msgs)
void ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
void ReorderBufferCommitChild(ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn)
TransactionId * ReorderBufferGetCatalogChangesXacts(ReorderBuffer *rb)
static void ReorderBufferSaveTXNSnapshot(ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id)
#define IsSpecInsert(action)
static Size ReorderBufferChangeSize(ReorderBufferChange *change)
int logical_decoding_work_mem
static void AssertChangeLsnOrder(ReorderBufferTXN *txn)
static bool ReorderBufferCanStream(ReorderBuffer *rb)
static int ReorderBufferTXNSizeCompare(const pairingheap_node *a, const pairingheap_node *b, void *arg)
static void ReorderBufferApplyChange(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change, bool streaming)
void ReorderBufferSkipPrepare(ReorderBuffer *rb, TransactionId xid)
bool ReorderBufferRememberPrepareInfo(ReorderBuffer *rb, TransactionId xid, XLogRecPtr prepare_lsn, XLogRecPtr end_lsn, TimestampTz prepare_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
static void ReorderBufferResetTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id, XLogRecPtr last_lsn)
void ReorderBufferFinishPrepared(ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, XLogRecPtr two_phase_at, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn, char *gid, bool is_commit)
static void ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn, int fd, ReorderBufferChange *change)
void ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
void ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
int debug_logical_replication_streaming
void ReorderBufferAddDistributedInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
#define IsInsertOrUpdate(action)
static void ReorderBufferSerializeReserve(ReorderBuffer *rb, Size sz)
void ReorderBufferQueueMessage(ReorderBuffer *rb, TransactionId xid, Snapshot snap, XLogRecPtr lsn, bool transactional, const char *prefix, Size message_size, const char *message)
bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *rb, TransactionId xid)
static void ReorderBufferExecuteInvalidations(uint32 nmsgs, SharedInvalidationMessage *msgs)
static void ReorderBufferIterTXNFinish(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
void ReorderBufferAddSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
static void ReorderBufferTruncateTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, bool txn_prepared)
#define CHANGES_THRESHOLD
static ReorderBufferTXN * ReorderBufferLargestStreamableTopTXN(ReorderBuffer *rb)
static bool ReorderBufferCheckAndTruncateAbortedTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn, char *data)
HeapTuple ReorderBufferAllocTupleBuf(ReorderBuffer *rb, Size tuple_len)
static void AssertTXNLsnOrder(ReorderBuffer *rb)
#define MAX_DISTR_INVAL_MSG_PER_TXN
static void ReorderBufferApplyMessage(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool streaming)
static void ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap)
static void ReorderBufferCleanupSerializedTXNs(const char *slotname)
ReorderBufferChange * ReorderBufferAllocChange(ReorderBuffer *rb)
void ReorderBufferSetRestartPoint(ReorderBuffer *rb, XLogRecPtr ptr)
static void SetupCheckXidLive(TransactionId xid)
static bool TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
static Snapshot ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap, ReorderBufferTXN *txn, CommandId cid)
static void ReorderBufferApplyTruncate(ReorderBuffer *rb, ReorderBufferTXN *txn, int nrelations, Relation *relations, ReorderBufferChange *change, bool streaming)
static void ReorderBufferProcessPartialChange(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool toast_insert)
static void ReorderBufferToastReset(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static void ReorderBufferQueueInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
static ReorderBufferTXN * ReorderBufferAllocTXN(ReorderBuffer *rb)
static void ReorderBufferFreeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferImmediateInvalidation(ReorderBuffer *rb, uint32 ninvalidations, SharedInvalidationMessage *invalidations)
static void ReorderBufferTransferSnapToParent(ReorderBufferTXN *txn, ReorderBufferTXN *subtxn)
static void ReorderBufferBuildTupleCidHash(ReorderBuffer *rb, ReorderBufferTXN *txn)
static ReorderBufferChange * ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
Oid * ReorderBufferAllocRelids(ReorderBuffer *rb, int nrelids)
static void ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
static void ReorderBufferChangeMemoryUpdate(ReorderBuffer *rb, ReorderBufferChange *change, ReorderBufferTXN *txn, bool addition, Size sz)
static void ReorderBufferStreamTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferProcessXid(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
static Size ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn, TXNEntryFile *file, XLogSegNo *segno)
void ReorderBufferAssignChild(ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr lsn)
void ReorderBufferFree(ReorderBuffer *rb)
static void ReorderBufferSerializedPath(char *path, ReplicationSlot *slot, TransactionId xid, XLogSegNo segno)
#define IsSpecConfirmOrAbort(action)
static void ApplyLogicalMappingFile(HTAB *tuplecid_data, const char *fname)
static const Size max_changes_in_memory
void StartupReorderBuffer(void)
void ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid)
static ReorderBufferTXN * ReorderBufferTXNByXid(ReorderBuffer *rb, TransactionId xid, bool create, bool *is_new, XLogRecPtr lsn, bool create_as_top)
static void ReorderBufferMaybeMarkTXNStreamed(ReorderBuffer *rb, ReorderBufferTXN *txn)
ReorderBufferTXN * ReorderBufferGetOldestTXN(ReorderBuffer *rb)
static void ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr commit_lsn, volatile Snapshot snapshot_now, volatile CommandId command_id, bool streaming)
#define rbtxn_is_committed(txn)
#define rbtxn_has_streamable_change(txn)
#define rbtxn_has_catalog_changes(txn)
@ DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE
@ DEBUG_LOGICAL_REP_STREAMING_BUFFERED
#define RBTXN_PREPARE_STATUS_MASK
#define rbtxn_is_serialized_clear(txn)
#define RBTXN_IS_STREAMED
#define rbtxn_is_prepared(txn)
#define RBTXN_HAS_PARTIAL_CHANGE
#define rbtxn_is_streamed(txn)
#define RBTXN_SENT_PREPARE
#define rbtxn_is_toptxn(txn)
#define rbtxn_get_toptxn(txn)
#define rbtxn_is_known_subxact(txn)
#define rbtxn_is_subtxn(txn)
#define RBTXN_HAS_CATALOG_CHANGES
#define RBTXN_IS_COMMITTED
#define PG_LOGICAL_MAPPINGS_DIR
#define RBTXN_DISTR_INVAL_OVERFLOWED
#define RBTXN_IS_SERIALIZED_CLEAR
#define rbtxn_sent_prepare(txn)
#define RBTXN_IS_PREPARED
#define rbtxn_distr_inval_overflowed(txn)
#define RBTXN_SKIPPED_PREPARE
#define RBTXN_HAS_STREAMABLE_CHANGE
@ REORDER_BUFFER_CHANGE_INVALIDATION
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM
@ REORDER_BUFFER_CHANGE_MESSAGE
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT
@ REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID
@ REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID
@ REORDER_BUFFER_CHANGE_TRUNCATE
@ REORDER_BUFFER_CHANGE_DELETE
@ REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT
#define rbtxn_is_aborted(txn)
#define RBTXN_IS_SERIALIZED
#define rbtxn_is_serialized(txn)
#define RBTXN_IS_ABORTED
#define RBTXN_IS_SUBXACT
#define rbtxn_has_partial_change(txn)
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
#define LOGICAL_REWRITE_FORMAT
Definition rewriteheap.h:54
MemoryContext SlabContextCreate(MemoryContext parent, const char *name, Size blockSize, Size chunkSize)
Definition slab.c:322
ReplicationSlot * MyReplicationSlot
Definition slot.c:158
bool ReplicationSlotValidateName(const char *name, bool allow_reserved_name, int elevel)
Definition slot.c:265
#define PG_REPLSLOT_DIR
Definition slot.h:21
void SnapBuildSnapDecRefcount(Snapshot snap)
Definition snapbuild.c:332
bool SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr)
Definition snapbuild.c:308
SnapBuildState SnapBuildCurrentState(SnapBuild *builder)
Definition snapbuild.c:281
@ SNAPBUILD_CONSISTENT
Definition snapbuild.h:58
void TeardownHistoricSnapshot(bool is_error)
Definition snapmgr.c:1685
void SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
Definition snapmgr.c:1669
static HTAB * tuplecid_data
Definition snapmgr.c:163
#define free(a)
bool attisdropped
Definition tupdesc.h:78
Definition dirent.c:26
Size keysize
Definition hsearch.h:69
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
Definition pg_list.h:54
XLogReaderState * reader
Definition logical.h:42
struct SnapBuild * snapshot_builder
Definition logical.h:44
ItemPointerData new_tid
Definition rewriteheap.h:40
RelFileLocator old_locator
Definition rewriteheap.h:37
ItemPointerData old_tid
Definition rewriteheap.h:39
RelFileLocator new_locator
Definition rewriteheap.h:38
RelFileNumber relNumber
Form_pg_class rd_rel
Definition rel.h:111
ReorderBufferChangeType action
struct ReorderBufferChange::@120::@125 inval
RelFileLocator rlocator
struct ReorderBufferChange::@120::@124 tuplecid
ItemPointerData tid
struct ReorderBufferTXN * txn
RelFileLocator locator
struct ReorderBufferChange::@120::@122 truncate
ReplOriginId origin_id
struct ReorderBufferChange::@120::@123 msg
struct ReorderBufferChange::@120::@121 tp
SharedInvalidationMessage * invalidations
union ReorderBufferChange::@120 data
ReorderBufferChange change
ReorderBufferChange * change
ReorderBufferTXN * txn
XLogRecPtr restart_decoding_lsn
pairingheap_node txn_node
TimestampTz commit_time
XLogRecPtr base_snapshot_lsn
TransactionId toplevel_xid
dlist_node catchange_node
SharedInvalidationMessage * invalidations
dlist_head tuplecids
XLogRecPtr first_lsn
TimestampTz abort_time
XLogRecPtr final_lsn
void * output_plugin_private
uint32 ninvalidations_distributed
XLogRecPtr origin_lsn
TimestampTz prepare_time
TransactionId xid
ReplOriginId origin_id
dlist_node base_snapshot_node
SharedInvalidationMessage * invalidations_distributed
dlist_head txns_by_base_snapshot_lsn
MemoryContext context
dclist_head catchange_txns
MemoryContext change_context
ReorderBufferTXN * by_txn_last_txn
TransactionId by_txn_last_xid
MemoryContext tup_context
dlist_head toplevel_by_lsn
pairingheap * txn_heap
MemoryContext txn_context
XLogRecPtr current_restart_decoding_lsn
ReplicationSlotPersistentData data
Definition slot.h:213
char fname[MAXPGPATH]
TransactionId xmin
Definition snapshot.h:153
int32 subxcnt
Definition snapshot.h:177
CommandId curcid
Definition snapshot.h:183
uint32 xcnt
Definition snapshot.h:165
TransactionId * subxip
Definition snapshot.h:176
XLogRecPtr EndRecPtr
Definition xlogreader.h:206
XLogRecPtr ReadRecPtr
Definition xlogreader.h:205
dlist_node * cur
Definition ilist.h:179
dlist_node * cur
Definition ilist.h:200
int32 va_rawsize
Definition varatt.h:34
Definition c.h:776
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:195
#define VARHDRSZ_SHORT
Definition varatt.h:278
static bool VARATT_IS_SHORT(const void *PTR)
Definition varatt.h:403
static void SET_VARSIZE_COMPRESSED(void *PTR, Size len)
Definition varatt.h:446
static bool VARATT_IS_EXTENDED(const void *PTR)
Definition varatt.h:410
static bool VARATT_IS_EXTERNAL(const void *PTR)
Definition varatt.h:354
static char * VARDATA_EXTERNAL(const void *PTR)
Definition varatt.h:340
static Size VARSIZE(const void *PTR)
Definition varatt.h:298
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static void SET_VARTAG_EXTERNAL(void *PTR, vartag_external tag)
Definition varatt.h:453
static Size VARATT_EXTERNAL_GET_EXTSIZE(varatt_external toast_pointer)
Definition varatt.h:507
@ VARTAG_INDIRECT
Definition varatt.h:86
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
static bool VARATT_EXTERNAL_IS_COMPRESSED(varatt_external toast_pointer)
Definition varatt.h:536
static Size VARSIZE_SHORT(const void *PTR)
Definition varatt.h:312
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:67
static void pgstat_report_wait_end(void)
Definition wait_event.h:83
#define lstat(path, sb)
Definition win32_port.h:275
#define S_ISDIR(m)
Definition win32_port.h:315
bool IsTransactionOrTransactionBlock(void)
Definition xact.c:5040
void BeginInternalSubTransaction(const char *name)
Definition xact.c:4745
TransactionId CheckXidAlive
Definition xact.c:101
void RollbackAndReleaseCurrentSubTransaction(void)
Definition xact.c:4847
void StartTransactionCommand(void)
Definition xact.c:3109
TransactionId GetCurrentTransactionIdIfAny(void)
Definition xact.c:473
TransactionId GetCurrentTransactionId(void)
Definition xact.c:456
void AbortCurrentTransaction(void)
Definition xact.c:3501
int xidComparator(const void *arg1, const void *arg2)
Definition xid.c:152
int wal_segment_size
Definition xlog.c:150
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint16 ReplOriginId
Definition xlogdefs.h:69
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
uint64 XLogSegNo
Definition xlogdefs.h:52

◆ IsSpecConfirmOrAbort

#define IsSpecConfirmOrAbort (   action)
Value:

Definition at line 202 of file reorderbuffer.c.

◆ IsSpecInsert

#define IsSpecInsert (   action)
Value:

Definition at line 198 of file reorderbuffer.c.

◆ MAX_DISTR_INVAL_MSG_PER_TXN

#define MAX_DISTR_INVAL_MSG_PER_TXN    ((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage))

Definition at line 126 of file reorderbuffer.c.

Typedef Documentation

◆ ReorderBufferDiskChange

◆ ReorderBufferIterTXNEntry

◆ ReorderBufferIterTXNState

◆ ReorderBufferToastEnt

◆ ReorderBufferTupleCidEnt

◆ ReorderBufferTupleCidKey

◆ ReorderBufferTXNByIdEnt

◆ RewriteMappingFile

◆ TXNEntryFile

Function Documentation

◆ ApplyLogicalMappingFile()

static void ApplyLogicalMappingFile ( HTAB tuplecid_data,
const char fname 
)
static

Definition at line 5360 of file reorderbuffer.c.

5361{
5362 char path[MAXPGPATH];
5363 int fd;
5364 int readBytes;
5366
5367 sprintf(path, "%s/%s", PG_LOGICAL_MAPPINGS_DIR, fname);
5369 if (fd < 0)
5370 ereport(ERROR,
5372 errmsg("could not open file \"%s\": %m", path)));
5373
5374 while (true)
5375 {
5379 bool found;
5380
5381 /* be careful about padding */
5382 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
5383
5384 /* read all mappings till the end of the file */
5386 readBytes = read(fd, &map, sizeof(LogicalRewriteMappingData));
5388
5389 if (readBytes < 0)
5390 ereport(ERROR,
5392 errmsg("could not read file \"%s\": %m",
5393 path)));
5394 else if (readBytes == 0) /* EOF */
5395 break;
5396 else if (readBytes != sizeof(LogicalRewriteMappingData))
5397 ereport(ERROR,
5399 errmsg("could not read from file \"%s\": read %d instead of %d bytes",
5400 path, readBytes,
5401 (int32) sizeof(LogicalRewriteMappingData))));
5402
5403 key.rlocator = map.old_locator;
5405 &key.tid);
5406
5407
5410
5411 /* no existing mapping, no need to update */
5412 if (!ent)
5413 continue;
5414
5415 key.rlocator = map.new_locator;
5417 &key.tid);
5418
5420 hash_search(tuplecid_data, &key, HASH_ENTER, &found);
5421
5422 if (found)
5423 {
5424 /*
5425 * Make sure the existing mapping makes sense. We sometime update
5426 * old records that did not yet have a cmax (e.g. pg_class' own
5427 * entry while rewriting it) during rewrites, so allow that.
5428 */
5429 Assert(ent->cmin == InvalidCommandId || ent->cmin == new_ent->cmin);
5430 Assert(ent->cmax == InvalidCommandId || ent->cmax == new_ent->cmax);
5431 }
5432 else
5433 {
5434 /* update mapping */
5435 new_ent->cmin = ent->cmin;
5436 new_ent->cmax = ent->cmax;
5437 new_ent->combocid = ent->combocid;
5438 }
5439 }
5440
5441 if (CloseTransientFile(fd) != 0)
5442 ereport(ERROR,
5444 errmsg("could not close file \"%s\": %m", path)));
5445}

References Assert, CloseTransientFile(), ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), HASH_ENTER, HASH_FIND, hash_search(), InvalidCommandId, ItemPointerCopy(), MAXPGPATH, LogicalRewriteMappingData::new_locator, LogicalRewriteMappingData::new_tid, LogicalRewriteMappingData::old_locator, LogicalRewriteMappingData::old_tid, OpenTransientFile(), PG_BINARY, PG_LOGICAL_MAPPINGS_DIR, pgstat_report_wait_end(), pgstat_report_wait_start(), read, sprintf, and tuplecid_data.

Referenced by UpdateLogicalMappings().

◆ AssertChangeLsnOrder()

static void AssertChangeLsnOrder ( ReorderBufferTXN txn)
static

Definition at line 1014 of file reorderbuffer.c.

1015{
1016#ifdef USE_ASSERT_CHECKING
1017 dlist_iter iter;
1019
1020 dlist_foreach(iter, &txn->changes)
1021 {
1023
1025
1028 Assert(txn->first_lsn <= cur_change->lsn);
1029
1030 if (XLogRecPtrIsValid(txn->end_lsn))
1031 Assert(cur_change->lsn <= txn->end_lsn);
1032
1034
1035 prev_lsn = cur_change->lsn;
1036 }
1037#endif
1038}

References Assert, ReorderBufferTXN::changes, dlist_iter::cur, dlist_container, dlist_foreach, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::first_lsn, and XLogRecPtrIsValid.

Referenced by ReorderBufferIterTXNInit().

◆ AssertTXNLsnOrder()

static void AssertTXNLsnOrder ( ReorderBuffer rb)
static

Definition at line 943 of file reorderbuffer.c.

944{
945#ifdef USE_ASSERT_CHECKING
946 LogicalDecodingContext *ctx = rb->private_data;
947 dlist_iter iter;
950
951 /*
952 * Skip the verification if we don't reach the LSN at which we start
953 * decoding the contents of transactions yet because until we reach the
954 * LSN, we could have transactions that don't have the association between
955 * the top-level transaction and subtransaction yet and consequently have
956 * the same LSN. We don't guarantee this association until we try to
957 * decode the actual contents of transaction. The ordering of the records
958 * prior to the start_decoding_at LSN should have been checked before the
959 * restart.
960 */
962 return;
963
964 dlist_foreach(iter, &rb->toplevel_by_lsn)
965 {
967 iter.cur);
968
969 /* start LSN must be set */
970 Assert(XLogRecPtrIsValid(cur_txn->first_lsn));
971
972 /* If there is an end LSN, it must be higher than start LSN */
973 if (XLogRecPtrIsValid(cur_txn->end_lsn))
974 Assert(cur_txn->first_lsn <= cur_txn->end_lsn);
975
976 /* Current initial LSN must be strictly higher than previous */
979
980 /* known-as-subtxn txns must not be listed */
982
983 prev_first_lsn = cur_txn->first_lsn;
984 }
985
986 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
987 {
989 base_snapshot_node,
990 iter.cur);
991
992 /* base snapshot (and its LSN) must be set */
993 Assert(cur_txn->base_snapshot != NULL);
994 Assert(XLogRecPtrIsValid(cur_txn->base_snapshot_lsn));
995
996 /* current LSN must be strictly higher than previous */
998 Assert(prev_base_snap_lsn < cur_txn->base_snapshot_lsn);
999
1000 /* known-as-subtxn txns must not be listed */
1002
1003 prev_base_snap_lsn = cur_txn->base_snapshot_lsn;
1004 }
1005#endif
1006}

References Assert, dlist_iter::cur, dlist_container, dlist_foreach, XLogReaderState::EndRecPtr, fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, LogicalDecodingContext::reader, SnapBuildXactNeedsSkip(), LogicalDecodingContext::snapshot_builder, and XLogRecPtrIsValid.

Referenced by ReorderBufferAssignChild(), ReorderBufferGetOldestTXN(), ReorderBufferGetOldestXmin(), ReorderBufferSetBaseSnapshot(), and ReorderBufferTXNByXid().

◆ file_sort_by_lsn()

static int file_sort_by_lsn ( const ListCell a_p,
const ListCell b_p 
)
static

Definition at line 5462 of file reorderbuffer.c.

5463{
5466
5467 return pg_cmp_u64(a->lsn, b->lsn);
5468}

References a, b, fb(), lfirst, and pg_cmp_u64().

Referenced by UpdateLogicalMappings().

◆ ReorderBufferAbort()

void ReorderBufferAbort ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
TimestampTz  abort_time 
)

Definition at line 3083 of file reorderbuffer.c.

3085{
3086 ReorderBufferTXN *txn;
3087
3088 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3089 false);
3090
3091 /* unknown, nothing to remove */
3092 if (txn == NULL)
3093 return;
3094
3095 txn->abort_time = abort_time;
3096
3097 /* For streamed transactions notify the remote node about the abort. */
3098 if (rbtxn_is_streamed(txn))
3099 {
3100 rb->stream_abort(rb, txn, lsn);
3101
3102 /*
3103 * We might have decoded changes for this transaction that could load
3104 * the cache as per the current transaction's view (consider DDL's
3105 * happened in this transaction). We don't want the decoding of future
3106 * transactions to use those cache entries so execute only the inval
3107 * messages in this transaction.
3108 */
3109 if (txn->ninvalidations > 0)
3111 txn->invalidations);
3112 }
3113
3114 /* cosmetic... */
3115 txn->final_lsn = lsn;
3116
3117 /* remove potential on-disk data, and deallocate */
3119}

References ReorderBufferTXN::abort_time, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodeAbort().

◆ ReorderBufferAbortOld()

void ReorderBufferAbortOld ( ReorderBuffer rb,
TransactionId  oldestRunningXid 
)

Definition at line 3129 of file reorderbuffer.c.

3130{
3132
3133 /*
3134 * Iterate through all (potential) toplevel TXNs and abort all that are
3135 * older than what possibly can be running. Once we've found the first
3136 * that is alive we stop, there might be some that acquired an xid earlier
3137 * but started writing later, but it's unlikely and they will be cleaned
3138 * up in a later call to this function.
3139 */
3140 dlist_foreach_modify(it, &rb->toplevel_by_lsn)
3141 {
3142 ReorderBufferTXN *txn;
3143
3144 txn = dlist_container(ReorderBufferTXN, node, it.cur);
3145
3146 if (TransactionIdPrecedes(txn->xid, oldestRunningXid))
3147 {
3148 elog(DEBUG2, "aborting old transaction %u", txn->xid);
3149
3150 /* Notify the remote node about the crash/immediate restart. */
3151 if (rbtxn_is_streamed(txn))
3152 rb->stream_abort(rb, txn, InvalidXLogRecPtr);
3153
3154 /* remove potential on-disk data, and deallocate this tx */
3156 }
3157 else
3158 return;
3159 }
3160}

References DEBUG2, dlist_container, dlist_foreach_modify, elog, fb(), InvalidXLogRecPtr, rbtxn_is_streamed, ReorderBufferCleanupTXN(), TransactionIdPrecedes(), and ReorderBufferTXN::xid.

Referenced by standby_decode().

◆ ReorderBufferAccumulateInvalidations()

static void ReorderBufferAccumulateInvalidations ( SharedInvalidationMessage **  invals_out,
uint32 ninvals_out,
SharedInvalidationMessage msgs_new,
Size  nmsgs_new 
)
static

Definition at line 3501 of file reorderbuffer.c.

3505{
3506 if (*ninvals_out == 0)
3507 {
3511 }
3512 else
3513 {
3514 /* Enlarge the array of inval messages */
3515 *invals_out =
3517 (*ninvals_out + nmsgs_new));
3521 }
3522}

References fb(), memcpy(), palloc_array, and repalloc_array.

Referenced by ReorderBufferAddDistributedInvalidations(), and ReorderBufferAddInvalidations().

◆ ReorderBufferAddDistributedInvalidations()

void ReorderBufferAddDistributedInvalidations ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Size  nmsgs,
SharedInvalidationMessage msgs 
)

Definition at line 3579 of file reorderbuffer.c.

3582{
3583 ReorderBufferTXN *txn;
3584 MemoryContext oldcontext;
3585
3586 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3587
3588 oldcontext = MemoryContextSwitchTo(rb->context);
3589
3590 /*
3591 * Collect all the invalidations under the top transaction, if available,
3592 * so that we can execute them all together. See comments
3593 * ReorderBufferAddInvalidations.
3594 */
3595 txn = rbtxn_get_toptxn(txn);
3596
3597 Assert(nmsgs > 0);
3598
3600 {
3601 /*
3602 * Check the transaction has enough space for storing distributed
3603 * invalidation messages.
3604 */
3606 {
3607 /*
3608 * Mark the invalidation message as overflowed and free up the
3609 * messages accumulated so far.
3610 */
3612
3614 {
3618 }
3619 }
3620 else
3623 msgs, nmsgs);
3624 }
3625
3626 /* Queue the invalidation messages into the transaction */
3627 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3628
3629 MemoryContextSwitchTo(oldcontext);
3630}

References Assert, fb(), ReorderBufferTXN::invalidations_distributed, MAX_DISTR_INVAL_MSG_PER_TXN, MemoryContextSwitchTo(), ReorderBufferTXN::ninvalidations_distributed, pfree(), RBTXN_DISTR_INVAL_OVERFLOWED, rbtxn_distr_inval_overflowed, rbtxn_get_toptxn, ReorderBufferAccumulateInvalidations(), ReorderBufferQueueInvalidations(), ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by SnapBuildDistributeSnapshotAndInval().

◆ ReorderBufferAddInvalidations()

void ReorderBufferAddInvalidations ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Size  nmsgs,
SharedInvalidationMessage msgs 
)

Definition at line 3538 of file reorderbuffer.c.

3541{
3542 ReorderBufferTXN *txn;
3543 MemoryContext oldcontext;
3544
3545 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3546
3547 oldcontext = MemoryContextSwitchTo(rb->context);
3548
3549 /*
3550 * Collect all the invalidations under the top transaction, if available,
3551 * so that we can execute them all together. See comments atop this
3552 * function.
3553 */
3554 txn = rbtxn_get_toptxn(txn);
3555
3556 Assert(nmsgs > 0);
3557
3559 &txn->ninvalidations,
3560 msgs, nmsgs);
3561
3562 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3563
3564 MemoryContextSwitchTo(oldcontext);
3565}

References Assert, fb(), ReorderBufferTXN::invalidations, MemoryContextSwitchTo(), ReorderBufferTXN::ninvalidations, rbtxn_get_toptxn, ReorderBufferAccumulateInvalidations(), ReorderBufferQueueInvalidations(), and ReorderBufferTXNByXid().

Referenced by xact_decode().

◆ ReorderBufferAddNewCommandId()

◆ ReorderBufferAddNewTupleCids()

◆ ReorderBufferAddSnapshot()

◆ ReorderBufferAllocate()

ReorderBuffer * ReorderBufferAllocate ( void  )

Definition at line 325 of file reorderbuffer.c.

326{
327 ReorderBuffer *buffer;
330
332
333 /* allocate memory in own context, to have better accountability */
335 "ReorderBuffer",
337
338 buffer =
340
341 memset(&hash_ctl, 0, sizeof(hash_ctl));
342
343 buffer->context = new_ctx;
344
346 "Change",
348 sizeof(ReorderBufferChange));
349
351 "TXN",
353 sizeof(ReorderBufferTXN));
354
355 /*
356 * To minimize memory fragmentation caused by long-running transactions
357 * with changes spanning multiple memory blocks, we use a single
358 * fixed-size memory block for decoded tuple storage. The performance
359 * testing showed that the default memory block size maintains logical
360 * decoding performance without causing fragmentation due to concurrent
361 * transactions. One might think that we can use the max size as
362 * SLAB_LARGE_BLOCK_SIZE but the test also showed it doesn't help resolve
363 * the memory fragmentation.
364 */
366 "Tuples",
370
371 hash_ctl.keysize = sizeof(TransactionId);
372 hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
373 hash_ctl.hcxt = buffer->context;
374
375 buffer->by_txn = hash_create("ReorderBufferByXid", 1000, &hash_ctl,
377
379 buffer->by_txn_last_txn = NULL;
380
381 buffer->outbuf = NULL;
382 buffer->outbufsize = 0;
383 buffer->size = 0;
384
385 /* txn_heap is ordered by transaction size */
387
388 buffer->spillTxns = 0;
389 buffer->spillCount = 0;
390 buffer->spillBytes = 0;
391 buffer->streamTxns = 0;
392 buffer->streamCount = 0;
393 buffer->streamBytes = 0;
394 buffer->memExceededCount = 0;
395 buffer->totalTxns = 0;
396 buffer->totalBytes = 0;
397
399
400 dlist_init(&buffer->toplevel_by_lsn);
402 dclist_init(&buffer->catchange_txns);
403
404 /*
405 * Ensure there's no stale data from prior uses of this slot, in case some
406 * prior exit avoided calling ReorderBufferFree. Failure to do this can
407 * produce duplicated txns, and it's very cheap if there's nothing there.
408 */
410
411 return buffer;
412}

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ReorderBuffer::by_txn, ReorderBuffer::by_txn_last_txn, ReorderBuffer::by_txn_last_xid, ReorderBuffer::catchange_txns, ReorderBuffer::change_context, ReorderBuffer::context, ReorderBuffer::current_restart_decoding_lsn, CurrentMemoryContext, ReplicationSlot::data, dclist_init(), dlist_init(), fb(), GenerationContextCreate(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, InvalidTransactionId, InvalidXLogRecPtr, ReorderBuffer::memExceededCount, MemoryContextAlloc(), MyReplicationSlot, ReplicationSlotPersistentData::name, NameStr, ReorderBuffer::outbuf, ReorderBuffer::outbufsize, pairingheap_allocate(), ReorderBufferCleanupSerializedTXNs(), ReorderBufferTXNSizeCompare(), ReorderBuffer::size, SLAB_DEFAULT_BLOCK_SIZE, SlabContextCreate(), ReorderBuffer::spillBytes, ReorderBuffer::spillCount, ReorderBuffer::spillTxns, ReorderBuffer::streamBytes, ReorderBuffer::streamCount, ReorderBuffer::streamTxns, ReorderBuffer::toplevel_by_lsn, ReorderBuffer::totalBytes, ReorderBuffer::totalTxns, ReorderBuffer::tup_context, ReorderBuffer::txn_context, ReorderBuffer::txn_heap, and ReorderBuffer::txns_by_base_snapshot_lsn.

Referenced by StartupDecodingContext().

◆ ReorderBufferAllocChange()

◆ ReorderBufferAllocRelids()

Oid * ReorderBufferAllocRelids ( ReorderBuffer rb,
int  nrelids 
)

Definition at line 626 of file reorderbuffer.c.

627{
628 Oid *relids;
630
631 alloc_len = sizeof(Oid) * nrelids;
632
633 relids = (Oid *) MemoryContextAlloc(rb->context, alloc_len);
634
635 return relids;
636}

References fb(), and MemoryContextAlloc().

Referenced by DecodeTruncate(), and ReorderBufferRestoreChange().

◆ ReorderBufferAllocTupleBuf()

HeapTuple ReorderBufferAllocTupleBuf ( ReorderBuffer rb,
Size  tuple_len 
)

Definition at line 593 of file reorderbuffer.c.

594{
595 HeapTuple tuple;
597
598 alloc_len = tuple_len + SizeofHeapTupleHeader;
599
600 tuple = (HeapTuple) MemoryContextAlloc(rb->tup_context,
602 tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
603
604 return tuple;
605}

References fb(), HEAPTUPLESIZE, MemoryContextAlloc(), SizeofHeapTupleHeader, and HeapTupleData::t_data.

Referenced by DecodeDelete(), DecodeInsert(), DecodeMultiInsert(), DecodeUpdate(), and ReorderBufferRestoreChange().

◆ ReorderBufferAllocTXN()

static ReorderBufferTXN * ReorderBufferAllocTXN ( ReorderBuffer rb)
static

Definition at line 436 of file reorderbuffer.c.

437{
438 ReorderBufferTXN *txn;
439
440 txn = (ReorderBufferTXN *)
441 MemoryContextAlloc(rb->txn_context, sizeof(ReorderBufferTXN));
442
443 memset(txn, 0, sizeof(ReorderBufferTXN));
444
445 dlist_init(&txn->changes);
446 dlist_init(&txn->tuplecids);
447 dlist_init(&txn->subtxns);
448
449 /* InvalidCommandId is not zero, so set it explicitly */
452
453 return txn;
454}

References ReorderBufferTXN::changes, ReorderBufferTXN::command_id, dlist_init(), fb(), InvalidCommandId, MemoryContextAlloc(), ReorderBufferTXN::output_plugin_private, ReorderBufferTXN::subtxns, and ReorderBufferTXN::tuplecids.

Referenced by ReorderBufferTXNByXid().

◆ ReorderBufferApplyChange()

static void ReorderBufferApplyChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2073 of file reorderbuffer.c.

2076{
2077 if (streaming)
2078 rb->stream_change(rb, txn, relation, change);
2079 else
2080 rb->apply_change(rb, txn, relation, change);
2081}

References fb().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferApplyMessage()

static void ReorderBufferApplyMessage ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2101 of file reorderbuffer.c.

2103{
2104 if (streaming)
2105 rb->stream_message(rb, txn, change->lsn, true,
2106 change->data.msg.prefix,
2107 change->data.msg.message_size,
2108 change->data.msg.message);
2109 else
2110 rb->message(rb, txn, change->lsn, true,
2111 change->data.msg.prefix,
2112 change->data.msg.message_size,
2113 change->data.msg.message);
2114}

References ReorderBufferChange::data, fb(), ReorderBufferChange::lsn, ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, and ReorderBufferChange::prefix.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferApplyTruncate()

static void ReorderBufferApplyTruncate ( ReorderBuffer rb,
ReorderBufferTXN txn,
int  nrelations,
Relation relations,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2087 of file reorderbuffer.c.

2090{
2091 if (streaming)
2092 rb->stream_truncate(rb, txn, nrelations, relations, change);
2093 else
2094 rb->apply_truncate(rb, txn, nrelations, relations, change);
2095}

References fb().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferAssignChild()

void ReorderBufferAssignChild ( ReorderBuffer rb,
TransactionId  xid,
TransactionId  subxid,
XLogRecPtr  lsn 
)

Definition at line 1100 of file reorderbuffer.c.

1102{
1103 ReorderBufferTXN *txn;
1105 bool new_top;
1106 bool new_sub;
1107
1108 txn = ReorderBufferTXNByXid(rb, xid, true, &new_top, lsn, true);
1109 subtxn = ReorderBufferTXNByXid(rb, subxid, true, &new_sub, lsn, false);
1110
1111 if (!new_sub)
1112 {
1114 {
1115 /* already associated, nothing to do */
1116 return;
1117 }
1118 else
1119 {
1120 /*
1121 * We already saw this transaction, but initially added it to the
1122 * list of top-level txns. Now that we know it's not top-level,
1123 * remove it from there.
1124 */
1125 dlist_delete(&subtxn->node);
1126 }
1127 }
1128
1129 subtxn->txn_flags |= RBTXN_IS_SUBXACT;
1130 subtxn->toplevel_xid = xid;
1131 Assert(subtxn->nsubtxns == 0);
1132
1133 /* set the reference to top-level transaction */
1134 subtxn->toptxn = txn;
1135
1136 /* add to subtransaction list */
1137 dlist_push_tail(&txn->subtxns, &subtxn->node);
1138 txn->nsubtxns++;
1139
1140 /* Possibly transfer the subtxn's snapshot to its top-level txn. */
1142
1143 /* Verify LSN-ordering invariant */
1145}

References Assert, AssertTXNLsnOrder(), dlist_delete(), dlist_push_tail(), fb(), ReorderBufferTXN::nsubtxns, rbtxn_is_known_subxact, RBTXN_IS_SUBXACT, ReorderBufferTransferSnapToParent(), ReorderBufferTXNByXid(), and ReorderBufferTXN::subtxns.

Referenced by LogicalDecodingProcessRecord(), and ReorderBufferCommitChild().

◆ ReorderBufferBuildTupleCidHash()

static void ReorderBufferBuildTupleCidHash ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1837 of file reorderbuffer.c.

1838{
1839 dlist_iter iter;
1841
1843 return;
1844
1846 hash_ctl.entrysize = sizeof(ReorderBufferTupleCidEnt);
1847 hash_ctl.hcxt = rb->context;
1848
1849 /*
1850 * create the hash with the exact number of to-be-stored tuplecids from
1851 * the start
1852 */
1853 txn->tuplecid_hash =
1854 hash_create("ReorderBufferTupleCid", txn->ntuplecids, &hash_ctl,
1856
1857 dlist_foreach(iter, &txn->tuplecids)
1858 {
1861 bool found;
1862 ReorderBufferChange *change;
1863
1864 change = dlist_container(ReorderBufferChange, node, iter.cur);
1865
1867
1868 /* be careful about padding */
1869 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
1870
1871 key.rlocator = change->data.tuplecid.locator;
1872
1874 &key.tid);
1875
1877 hash_search(txn->tuplecid_hash, &key, HASH_ENTER, &found);
1878 if (!found)
1879 {
1880 ent->cmin = change->data.tuplecid.cmin;
1881 ent->cmax = change->data.tuplecid.cmax;
1882 ent->combocid = change->data.tuplecid.combocid;
1883 }
1884 else
1885 {
1886 /*
1887 * Maybe we already saw this tuple before in this transaction, but
1888 * if so it must have the same cmin.
1889 */
1890 Assert(ent->cmin == change->data.tuplecid.cmin);
1891
1892 /*
1893 * cmax may be initially invalid, but once set it can only grow,
1894 * and never become invalid again.
1895 */
1896 Assert((ent->cmax == InvalidCommandId) ||
1897 ((change->data.tuplecid.cmax != InvalidCommandId) &&
1898 (change->data.tuplecid.cmax > ent->cmax)));
1899 ent->cmax = change->data.tuplecid.cmax;
1900 }
1901 }
1902}

References ReorderBufferChange::action, Assert, ReorderBufferChange::cmax, ReorderBufferChange::cmin, ReorderBufferChange::combocid, dlist_iter::cur, ReorderBufferChange::data, dlist_container, dlist_foreach, dlist_is_empty(), fb(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_ENTER, hash_search(), InvalidCommandId, ItemPointerCopy(), HASHCTL::keysize, ReorderBufferChange::locator, ReorderBufferTXN::ntuplecids, rbtxn_has_catalog_changes, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChange::tid, ReorderBufferChange::tuplecid, ReorderBufferTXN::tuplecid_hash, and ReorderBufferTXN::tuplecids.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferCanStartStreaming()

static bool ReorderBufferCanStartStreaming ( ReorderBuffer rb)
inlinestatic

Definition at line 4312 of file reorderbuffer.c.

4313{
4314 LogicalDecodingContext *ctx = rb->private_data;
4315 SnapBuild *builder = ctx->snapshot_builder;
4316
4317 /* We can't start streaming unless a consistent state is reached. */
4319 return false;
4320
4321 /*
4322 * We can't start streaming immediately even if the streaming is enabled
4323 * because we previously decoded this transaction and now just are
4324 * restarting.
4325 */
4327 !SnapBuildXactNeedsSkip(builder, ctx->reader->ReadRecPtr))
4328 return true;
4329
4330 return false;
4331}

References fb(), LogicalDecodingContext::reader, XLogReaderState::ReadRecPtr, ReorderBufferCanStream(), SNAPBUILD_CONSISTENT, SnapBuildCurrentState(), SnapBuildXactNeedsSkip(), and LogicalDecodingContext::snapshot_builder.

Referenced by ReorderBufferCheckMemoryLimit(), and ReorderBufferProcessPartialChange().

◆ ReorderBufferCanStream()

static bool ReorderBufferCanStream ( ReorderBuffer rb)
inlinestatic

Definition at line 4303 of file reorderbuffer.c.

4304{
4305 LogicalDecodingContext *ctx = rb->private_data;
4306
4307 return ctx->streaming;
4308}

References fb(), and LogicalDecodingContext::streaming.

Referenced by ReorderBufferCanStartStreaming(), and ReorderBufferProcessPartialChange().

◆ ReorderBufferChangeMemoryUpdate()

static void ReorderBufferChangeMemoryUpdate ( ReorderBuffer rb,
ReorderBufferChange change,
ReorderBufferTXN txn,
bool  addition,
Size  sz 
)
static

Definition at line 3381 of file reorderbuffer.c.

3385{
3386 ReorderBufferTXN *toptxn;
3387
3388 Assert(txn || change);
3389
3390 /*
3391 * Ignore tuple CID changes, because those are not evicted when reaching
3392 * memory limit. So we just don't count them, because it might easily
3393 * trigger a pointless attempt to spill.
3394 */
3395 if (change && change->action == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID)
3396 return;
3397
3398 if (sz == 0)
3399 return;
3400
3401 if (txn == NULL)
3402 txn = change->txn;
3403 Assert(txn != NULL);
3404
3405 /*
3406 * Update the total size in top level as well. This is later used to
3407 * compute the decoding stats.
3408 */
3409 toptxn = rbtxn_get_toptxn(txn);
3410
3411 if (addition)
3412 {
3413 Size oldsize = txn->size;
3414
3415 txn->size += sz;
3416 rb->size += sz;
3417
3418 /* Update the total size in the top transaction. */
3419 toptxn->total_size += sz;
3420
3421 /* Update the max-heap */
3422 if (oldsize != 0)
3423 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3424 pairingheap_add(rb->txn_heap, &txn->txn_node);
3425 }
3426 else
3427 {
3428 Assert((rb->size >= sz) && (txn->size >= sz));
3429 txn->size -= sz;
3430 rb->size -= sz;
3431
3432 /* Update the total size in the top transaction. */
3433 toptxn->total_size -= sz;
3434
3435 /* Update the max-heap */
3436 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3437 if (txn->size != 0)
3438 pairingheap_add(rb->txn_heap, &txn->txn_node);
3439 }
3440
3441 Assert(txn->size <= rb->size);
3442}

References ReorderBufferChange::action, Assert, fb(), pairingheap_add(), pairingheap_remove(), rbtxn_get_toptxn, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferTXN::size, ReorderBufferTXN::total_size, ReorderBufferChange::txn, and ReorderBufferTXN::txn_node.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferQueueChange(), ReorderBufferRestoreChange(), ReorderBufferSerializeTXN(), ReorderBufferToastReplace(), and ReorderBufferTruncateTXN().

◆ ReorderBufferChangeSize()

static Size ReorderBufferChangeSize ( ReorderBufferChange change)
static

Definition at line 4455 of file reorderbuffer.c.

4456{
4457 Size sz = sizeof(ReorderBufferChange);
4458
4459 switch (change->action)
4460 {
4461 /* fall through these, they're all similar enough */
4466 {
4468 newtup;
4469 Size oldlen = 0;
4470 Size newlen = 0;
4471
4472 oldtup = change->data.tp.oldtuple;
4473 newtup = change->data.tp.newtuple;
4474
4475 if (oldtup)
4476 {
4477 sz += sizeof(HeapTupleData);
4478 oldlen = oldtup->t_len;
4479 sz += oldlen;
4480 }
4481
4482 if (newtup)
4483 {
4484 sz += sizeof(HeapTupleData);
4485 newlen = newtup->t_len;
4486 sz += newlen;
4487 }
4488
4489 break;
4490 }
4492 {
4493 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4494
4495 sz += prefix_size + change->data.msg.message_size +
4496 sizeof(Size) + sizeof(Size);
4497
4498 break;
4499 }
4501 {
4502 sz += sizeof(SharedInvalidationMessage) *
4503 change->data.inval.ninvalidations;
4504 break;
4505 }
4507 {
4508 Snapshot snap;
4509
4510 snap = change->data.snapshot;
4511
4512 sz += sizeof(SnapshotData) +
4513 sizeof(TransactionId) * snap->xcnt +
4514 sizeof(TransactionId) * snap->subxcnt;
4515
4516 break;
4517 }
4519 {
4520 sz += sizeof(Oid) * change->data.truncate.nrelids;
4521
4522 break;
4523 }
4528 /* ReorderBufferChange contains everything important */
4529 break;
4530 }
4531
4532 return sz;
4533}

References ReorderBufferChange::action, ReorderBufferChange::data, fb(), ReorderBufferChange::inval, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::prefix, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChange::snapshot, HeapTupleData::t_len, ReorderBufferChange::tp, ReorderBufferChange::truncate, and SnapshotData::xcnt.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferQueueChange(), ReorderBufferRestoreChange(), ReorderBufferToastReplace(), and ReorderBufferTruncateTXN().

◆ ReorderBufferCheckAndTruncateAbortedTXN()

static bool ReorderBufferCheckAndTruncateAbortedTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1775 of file reorderbuffer.c.

1776{
1777 /* Quick return for regression tests */
1779 return false;
1780
1781 /*
1782 * Quick return if the transaction status is already known.
1783 */
1784
1785 if (rbtxn_is_committed(txn))
1786 return false;
1787 if (rbtxn_is_aborted(txn))
1788 {
1789 /* Already-aborted transactions should not have any changes */
1790 Assert(txn->size == 0);
1791
1792 return true;
1793 }
1794
1795 /* Otherwise, check the transaction status using CLOG lookup */
1796
1798 return false;
1799
1800 if (TransactionIdDidCommit(txn->xid))
1801 {
1802 /*
1803 * Remember the transaction is committed so that we can skip CLOG
1804 * check next time, avoiding the pressure on CLOG lookup.
1805 */
1806 Assert(!rbtxn_is_aborted(txn));
1808 return false;
1809 }
1810
1811 /*
1812 * The transaction aborted. We discard both the changes collected so far
1813 * and the toast reconstruction data. The full cleanup will happen as part
1814 * of decoding ABORT record of this transaction.
1815 */
1818
1819 /* All changes should be discarded */
1820 Assert(txn->size == 0);
1821
1822 /*
1823 * Mark the transaction as aborted so we can ignore future changes of this
1824 * transaction.
1825 */
1828
1829 return true;
1830}

References Assert, DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE, debug_logical_replication_streaming, fb(), RBTXN_IS_ABORTED, rbtxn_is_aborted, RBTXN_IS_COMMITTED, rbtxn_is_committed, rbtxn_is_prepared, ReorderBufferToastReset(), ReorderBufferTruncateTXN(), ReorderBufferTXN::size, TransactionIdDidCommit(), TransactionIdIsInProgress(), ReorderBufferTXN::txn_flags, unlikely, and ReorderBufferTXN::xid.

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferCheckMemoryLimit()

static void ReorderBufferCheckMemoryLimit ( ReorderBuffer rb)
static

Definition at line 3892 of file reorderbuffer.c.

3893{
3894 ReorderBufferTXN *txn;
3895 bool update_stats = true;
3896
3897 if (rb->size >= logical_decoding_work_mem * (Size) 1024)
3898 {
3899 /*
3900 * Update the statistics as the memory usage has reached the limit. We
3901 * report the statistics update later in this function since we can
3902 * update the slot statistics altogether while streaming or
3903 * serializing transactions in most cases.
3904 */
3905 rb->memExceededCount += 1;
3906 }
3908 {
3909 /*
3910 * Bail out if debug_logical_replication_streaming is buffered and we
3911 * haven't exceeded the memory limit.
3912 */
3913 return;
3914 }
3915
3916 /*
3917 * If debug_logical_replication_streaming is immediate, loop until there's
3918 * no change. Otherwise, loop until we reach under the memory limit. One
3919 * might think that just by evicting the largest (sub)transaction we will
3920 * come under the memory limit based on assumption that the selected
3921 * transaction is at least as large as the most recent change (which
3922 * caused us to go over the memory limit). However, that is not true
3923 * because a user can reduce the logical_decoding_work_mem to a smaller
3924 * value before the most recent change.
3925 */
3926 while (rb->size >= logical_decoding_work_mem * (Size) 1024 ||
3928 rb->size > 0))
3929 {
3930 /*
3931 * Pick the largest non-aborted transaction and evict it from memory
3932 * by streaming, if possible. Otherwise, spill to disk.
3933 */
3936 {
3937 /* we know there has to be one, because the size is not zero */
3938 Assert(txn && rbtxn_is_toptxn(txn));
3939 Assert(txn->total_size > 0);
3940 Assert(rb->size >= txn->total_size);
3941
3942 /* skip the transaction if aborted */
3944 continue;
3945
3947 }
3948 else
3949 {
3950 /*
3951 * Pick the largest transaction (or subtransaction) and evict it
3952 * from memory by serializing it to disk.
3953 */
3955
3956 /* we know there has to be one, because the size is not zero */
3957 Assert(txn);
3958 Assert(txn->size > 0);
3959 Assert(rb->size >= txn->size);
3960
3961 /* skip the transaction if aborted */
3963 continue;
3964
3966 }
3967
3968 /*
3969 * After eviction, the transaction should have no entries in memory,
3970 * and should use 0 bytes for changes.
3971 */
3972 Assert(txn->size == 0);
3973 Assert(txn->nentries_mem == 0);
3974
3975 /*
3976 * We've reported the memExceededCount update while streaming or
3977 * serializing the transaction.
3978 */
3979 update_stats = false;
3980 }
3981
3982 if (update_stats)
3984
3985 /* We must be under the memory limit now. */
3986 Assert(rb->size < logical_decoding_work_mem * (Size) 1024);
3987}

References Assert, DEBUG_LOGICAL_REP_STREAMING_BUFFERED, DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE, debug_logical_replication_streaming, fb(), logical_decoding_work_mem, ReorderBufferTXN::nentries_mem, rbtxn_is_toptxn, ReorderBufferCanStartStreaming(), ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferLargestStreamableTopTXN(), ReorderBufferLargestTXN(), ReorderBufferSerializeTXN(), ReorderBufferStreamTXN(), ReorderBufferTXN::size, ReorderBufferTXN::total_size, and UpdateDecodingStats().

Referenced by ReorderBufferQueueChange().

◆ ReorderBufferCleanupSerializedTXNs()

static void ReorderBufferCleanupSerializedTXNs ( const char slotname)
static

Definition at line 4880 of file reorderbuffer.c.

4881{
4882 DIR *spill_dir;
4883 struct dirent *spill_de;
4884 struct stat statbuf;
4885 char path[MAXPGPATH * 2 + sizeof(PG_REPLSLOT_DIR)];
4886
4887 sprintf(path, "%s/%s", PG_REPLSLOT_DIR, slotname);
4888
4889 /* we're only handling directories here, skip if it's not ours */
4890 if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
4891 return;
4892
4893 spill_dir = AllocateDir(path);
4894 while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL)
4895 {
4896 /* only look at names that can be ours */
4897 if (strncmp(spill_de->d_name, "xid", 3) == 0)
4898 {
4899 snprintf(path, sizeof(path),
4900 "%s/%s/%s", PG_REPLSLOT_DIR, slotname,
4901 spill_de->d_name);
4902
4903 if (unlink(path) != 0)
4904 ereport(ERROR,
4906 errmsg("could not remove file \"%s\" during removal of %s/%s/xid*: %m",
4907 path, PG_REPLSLOT_DIR, slotname)));
4908 }
4909 }
4911}

References AllocateDir(), ereport, errcode_for_file_access(), errmsg, ERROR, fb(), FreeDir(), INFO, lstat, MAXPGPATH, PG_REPLSLOT_DIR, ReadDirExtended(), S_ISDIR, snprintf, and sprintf.

Referenced by ReorderBufferAllocate(), ReorderBufferFree(), and StartupReorderBuffer().

◆ ReorderBufferCleanupTXN()

static void ReorderBufferCleanupTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1536 of file reorderbuffer.c.

1537{
1538 bool found;
1539 dlist_mutable_iter iter;
1540 Size mem_freed = 0;
1541
1542 /* cleanup subtransactions & their changes */
1543 dlist_foreach_modify(iter, &txn->subtxns)
1544 {
1546
1548
1549 /*
1550 * Subtransactions are always associated to the toplevel TXN, even if
1551 * they originally were happening inside another subtxn, so we won't
1552 * ever recurse more than one level deep here.
1553 */
1555 Assert(subtxn->nsubtxns == 0);
1556
1558 }
1559
1560 /* cleanup changes in the txn */
1561 dlist_foreach_modify(iter, &txn->changes)
1562 {
1563 ReorderBufferChange *change;
1564
1565 change = dlist_container(ReorderBufferChange, node, iter.cur);
1566
1567 /* Check we're not mixing changes from different transactions. */
1568 Assert(change->txn == txn);
1569
1570 /*
1571 * Instead of updating the memory counter for individual changes, we
1572 * sum up the size of memory to free so we can update the memory
1573 * counter all together below. This saves costs of maintaining the
1574 * max-heap.
1575 */
1577
1578 ReorderBufferFreeChange(rb, change, false);
1579 }
1580
1581 /* Update the memory counter */
1583
1584 /*
1585 * Cleanup the tuplecids we stored for decoding catalog snapshot access.
1586 * They are always stored in the toplevel transaction.
1587 */
1588 dlist_foreach_modify(iter, &txn->tuplecids)
1589 {
1590 ReorderBufferChange *change;
1591
1592 change = dlist_container(ReorderBufferChange, node, iter.cur);
1593
1594 /* Check we're not mixing changes from different transactions. */
1595 Assert(change->txn == txn);
1597
1598 ReorderBufferFreeChange(rb, change, true);
1599 }
1600
1601 /*
1602 * Cleanup the base snapshot, if set.
1603 */
1604 if (txn->base_snapshot != NULL)
1605 {
1608 }
1609
1610 /*
1611 * Cleanup the snapshot for the last streamed run.
1612 */
1613 if (txn->snapshot_now != NULL)
1614 {
1617 }
1618
1619 /*
1620 * Remove TXN from its containing lists.
1621 *
1622 * Note: if txn is known as subxact, we are deleting the TXN from its
1623 * parent's list of known subxacts; this leaves the parent's nsubxacts
1624 * count too high, but we don't care. Otherwise, we are deleting the TXN
1625 * from the LSN-ordered list of toplevel TXNs. We remove the TXN from the
1626 * list of catalog modifying transactions as well.
1627 */
1628 dlist_delete(&txn->node);
1630 dclist_delete_from(&rb->catchange_txns, &txn->catchange_node);
1631
1632 /* now remove reference from buffer */
1633 hash_search(rb->by_txn, &txn->xid, HASH_REMOVE, &found);
1634 Assert(found);
1635
1636 /* remove entries spilled to disk */
1637 if (rbtxn_is_serialized(txn))
1639
1640 /* deallocate */
1642}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_node, ReorderBufferTXN::catchange_node, ReorderBufferTXN::changes, dlist_mutable_iter::cur, dclist_delete_from(), dlist_container, dlist_delete(), dlist_foreach_modify, fb(), HASH_REMOVE, hash_search(), ReorderBufferTXN::node, rbtxn_has_catalog_changes, rbtxn_is_known_subxact, rbtxn_is_serialized, rbtxn_is_streamed, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferFreeSnap(), ReorderBufferFreeTXN(), ReorderBufferRestoreCleanup(), SnapBuildSnapDecRefcount(), ReorderBufferTXN::snapshot_now, ReorderBufferTXN::subtxns, ReorderBufferTXN::tuplecids, ReorderBufferChange::txn, and ReorderBufferTXN::xid.

Referenced by ReorderBufferAbort(), ReorderBufferAbortOld(), ReorderBufferCleanupTXN(), ReorderBufferFinishPrepared(), ReorderBufferForget(), ReorderBufferProcessTXN(), ReorderBufferReplay(), and ReorderBufferStreamCommit().

◆ ReorderBufferCommit()

void ReorderBufferCommit ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
TimestampTz  commit_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn 
)

Definition at line 2880 of file reorderbuffer.c.

2884{
2885 ReorderBufferTXN *txn;
2886
2887 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2888 false);
2889
2890 /* unknown transaction, nothing to replay */
2891 if (txn == NULL)
2892 return;
2893
2894 ReorderBufferReplay(txn, rb, xid, commit_lsn, end_lsn, commit_time,
2895 origin_id, origin_lsn);
2896}

References fb(), InvalidXLogRecPtr, ReorderBufferReplay(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit().

◆ ReorderBufferCommitChild()

void ReorderBufferCommitChild ( ReorderBuffer rb,
TransactionId  xid,
TransactionId  subxid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn 
)

Definition at line 1220 of file reorderbuffer.c.

1223{
1225
1226 subtxn = ReorderBufferTXNByXid(rb, subxid, false, NULL,
1227 InvalidXLogRecPtr, false);
1228
1229 /*
1230 * No need to do anything if that subtxn didn't contain any changes
1231 */
1232 if (!subtxn)
1233 return;
1234
1235 subtxn->final_lsn = commit_lsn;
1236 subtxn->end_lsn = end_lsn;
1237
1238 /*
1239 * Assign this subxact as a child of the toplevel xact (no-op if already
1240 * done.)
1241 */
1243}

References fb(), InvalidXLogRecPtr, ReorderBufferAssignChild(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit(), and DecodePrepare().

◆ ReorderBufferCopySnap()

static Snapshot ReorderBufferCopySnap ( ReorderBuffer rb,
Snapshot  orig_snap,
ReorderBufferTXN txn,
CommandId  cid 
)
static

Definition at line 1910 of file reorderbuffer.c.

1912{
1913 Snapshot snap;
1914 dlist_iter iter;
1915 int i = 0;
1916 Size size;
1917
1918 size = sizeof(SnapshotData) +
1919 sizeof(TransactionId) * orig_snap->xcnt +
1920 sizeof(TransactionId) * (txn->nsubtxns + 1);
1921
1922 snap = MemoryContextAllocZero(rb->context, size);
1923 memcpy(snap, orig_snap, sizeof(SnapshotData));
1924
1925 snap->copied = true;
1926 snap->active_count = 1; /* mark as active so nobody frees it */
1927 snap->regd_count = 0;
1928 snap->xip = (TransactionId *) (snap + 1);
1929
1930 memcpy(snap->xip, orig_snap->xip, sizeof(TransactionId) * snap->xcnt);
1931
1932 /*
1933 * snap->subxip contains all txids that belong to our transaction which we
1934 * need to check via cmin/cmax. That's why we store the toplevel
1935 * transaction in there as well.
1936 */
1937 snap->subxip = snap->xip + snap->xcnt;
1938 snap->subxip[i++] = txn->xid;
1939
1940 /*
1941 * txn->nsubtxns isn't decreased when subtransactions abort, so count
1942 * manually. Since it's an upper boundary it is safe to use it for the
1943 * allocation above.
1944 */
1945 snap->subxcnt = 1;
1946
1947 dlist_foreach(iter, &txn->subtxns)
1948 {
1950
1952 snap->subxip[i++] = sub_txn->xid;
1953 snap->subxcnt++;
1954 }
1955
1956 /* sort so we can bsearch() later */
1957 qsort(snap->subxip, snap->subxcnt, sizeof(TransactionId), xidComparator);
1958
1959 /* store the specified current CommandId */
1960 snap->curcid = cid;
1961
1962 return snap;
1963}

References dlist_iter::cur, dlist_container, dlist_foreach, fb(), i, memcpy(), MemoryContextAllocZero(), ReorderBufferTXN::nsubtxns, qsort, ReorderBufferTXN::subtxns, ReorderBufferTXN::xid, and xidComparator().

Referenced by ReorderBufferProcessTXN(), ReorderBufferSaveTXNSnapshot(), and ReorderBufferStreamTXN().

◆ ReorderBufferExecuteInvalidations()

static void ReorderBufferExecuteInvalidations ( uint32  nmsgs,
SharedInvalidationMessage msgs 
)
static

Definition at line 3637 of file reorderbuffer.c.

3638{
3639 int i;
3640
3641 for (i = 0; i < nmsgs; i++)
3643}

References i, and LocalExecuteInvalidationMessage().

Referenced by ReorderBufferFinishPrepared(), and ReorderBufferProcessTXN().

◆ ReorderBufferFinishPrepared()

void ReorderBufferFinishPrepared ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
XLogRecPtr  two_phase_at,
TimestampTz  commit_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn,
char gid,
bool  is_commit 
)

Definition at line 2997 of file reorderbuffer.c.

3002{
3003 ReorderBufferTXN *txn;
3004 XLogRecPtr prepare_end_lsn;
3005 TimestampTz prepare_time;
3006
3007 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, commit_lsn, false);
3008
3009 /* unknown transaction, nothing to do */
3010 if (txn == NULL)
3011 return;
3012
3013 /*
3014 * By this time the txn has the prepare record information, remember it to
3015 * be later used for rollback.
3016 */
3017 prepare_end_lsn = txn->end_lsn;
3018 prepare_time = txn->prepare_time;
3019
3020 /* add the gid in the txn */
3021 txn->gid = pstrdup(gid);
3022
3023 /*
3024 * It is possible that this transaction is not decoded at prepare time
3025 * either because by that time we didn't have a consistent snapshot, or
3026 * two_phase was not enabled, or it was decoded earlier but we have
3027 * restarted. We only need to send the prepare if it was not decoded
3028 * earlier. We don't need to decode the xact for aborts if it is not done
3029 * already.
3030 */
3031 if ((txn->final_lsn < two_phase_at) && is_commit)
3032 {
3033 /*
3034 * txn must have been marked as a prepared transaction and skipped but
3035 * not sent a prepare. Also, the prepare info must have been updated
3036 * in txn even if we skip prepare.
3037 */
3041
3042 /*
3043 * By this time the txn has the prepare record information and it is
3044 * important to use that so that downstream gets the accurate
3045 * information. If instead, we have passed commit information here
3046 * then downstream can behave as it has already replayed commit
3047 * prepared after the restart.
3048 */
3049 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
3050 txn->prepare_time, txn->origin_id, txn->origin_lsn);
3051 }
3052
3053 txn->final_lsn = commit_lsn;
3054 txn->end_lsn = end_lsn;
3055 txn->commit_time = commit_time;
3056 txn->origin_id = origin_id;
3057 txn->origin_lsn = origin_lsn;
3058
3059 if (is_commit)
3060 rb->commit_prepared(rb, txn, commit_lsn);
3061 else
3062 rb->rollback_prepared(rb, txn, prepare_end_lsn, prepare_time);
3063
3064 /* cleanup: make sure there's no cache pollution */
3066 txn->invalidations);
3068}

References Assert, ReorderBufferTXN::commit_time, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::gid, ReorderBufferTXN::invalidations, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, pstrdup(), RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SKIPPED_PREPARE, ReorderBufferCleanupTXN(), ReorderBufferExecuteInvalidations(), ReorderBufferReplay(), ReorderBufferTXNByXid(), ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodeAbort(), and DecodeCommit().

◆ ReorderBufferForget()

void ReorderBufferForget ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3176 of file reorderbuffer.c.

3177{
3178 ReorderBufferTXN *txn;
3179
3180 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3181 false);
3182
3183 /* unknown, nothing to forget */
3184 if (txn == NULL)
3185 return;
3186
3187 /* this transaction mustn't be streamed */
3189
3190 /* cosmetic... */
3191 txn->final_lsn = lsn;
3192
3193 /*
3194 * Process only cache invalidation messages in this transaction if there
3195 * are any. Even if we're not interested in the transaction's contents, it
3196 * could have manipulated the catalog and we need to update the caches
3197 * according to that.
3198 */
3199 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3201 txn->invalidations);
3202 else
3203 Assert(txn->ninvalidations == 0);
3204
3205 /* remove potential on-disk data, and deallocate */
3207}

References Assert, ReorderBufferTXN::base_snapshot, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit().

◆ ReorderBufferFree()

void ReorderBufferFree ( ReorderBuffer rb)

Definition at line 418 of file reorderbuffer.c.

419{
420 MemoryContext context = rb->context;
421
422 /*
423 * We free separately allocated data by entirely scrapping reorderbuffer's
424 * memory context.
425 */
426 MemoryContextDelete(context);
427
428 /* Free disk space used by unconsumed reorder buffers */
430}

References ReplicationSlot::data, fb(), MemoryContextDelete(), MyReplicationSlot, ReplicationSlotPersistentData::name, NameStr, and ReorderBufferCleanupSerializedTXNs().

Referenced by FreeDecodingContext().

◆ ReorderBufferFreeChange()

void ReorderBufferFreeChange ( ReorderBuffer rb,
ReorderBufferChange change,
bool  upd_mem 
)

Definition at line 523 of file reorderbuffer.c.

525{
526 /* update memory accounting info */
527 if (upd_mem)
530
531 /* free contained data */
532 switch (change->action)
533 {
538 if (change->data.tp.newtuple)
539 {
541 change->data.tp.newtuple = NULL;
542 }
543
544 if (change->data.tp.oldtuple)
545 {
547 change->data.tp.oldtuple = NULL;
548 }
549 break;
551 if (change->data.msg.prefix != NULL)
552 pfree(change->data.msg.prefix);
553 change->data.msg.prefix = NULL;
554 if (change->data.msg.message != NULL)
555 pfree(change->data.msg.message);
556 change->data.msg.message = NULL;
557 break;
559 if (change->data.inval.invalidations)
560 pfree(change->data.inval.invalidations);
561 change->data.inval.invalidations = NULL;
562 break;
564 if (change->data.snapshot)
565 {
567 change->data.snapshot = NULL;
568 }
569 break;
570 /* no data in addition to the struct itself */
572 if (change->data.truncate.relids != NULL)
573 {
575 change->data.truncate.relids = NULL;
576 }
577 break;
582 break;
583 }
584
585 pfree(change);
586}

References ReorderBufferChange::action, ReorderBufferChange::data, fb(), ReorderBufferChange::inval, ReorderBufferChange::invalidations, ReorderBufferChange::message, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::oldtuple, pfree(), ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferFreeRelids(), ReorderBufferFreeSnap(), ReorderBufferFreeTupleBuf(), ReorderBufferChange::snapshot, ReorderBufferChange::tp, and ReorderBufferChange::truncate.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferIterTXNFinish(), ReorderBufferIterTXNNext(), ReorderBufferProcessTXN(), ReorderBufferQueueChange(), ReorderBufferRestoreChanges(), ReorderBufferSerializeTXN(), ReorderBufferToastReset(), and ReorderBufferTruncateTXN().

◆ ReorderBufferFreeRelids()

void ReorderBufferFreeRelids ( ReorderBuffer rb,
Oid relids 
)

Definition at line 642 of file reorderbuffer.c.

643{
644 pfree(relids);
645}

References pfree().

Referenced by ReorderBufferFreeChange().

◆ ReorderBufferFreeSnap()

static void ReorderBufferFreeSnap ( ReorderBuffer rb,
Snapshot  snap 
)
static

Definition at line 1969 of file reorderbuffer.c.

1970{
1971 if (snap->copied)
1972 pfree(snap);
1973 else
1975}

References fb(), pfree(), and SnapBuildSnapDecRefcount().

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferProcessTXN(), and ReorderBufferStreamTXN().

◆ ReorderBufferFreeTupleBuf()

void ReorderBufferFreeTupleBuf ( HeapTuple  tuple)

Definition at line 611 of file reorderbuffer.c.

612{
613 pfree(tuple);
614}

References pfree().

Referenced by ReorderBufferFreeChange().

◆ ReorderBufferFreeTXN()

static void ReorderBufferFreeTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 460 of file reorderbuffer.c.

461{
462 /* clean the lookup cache if we were cached (quite likely) */
463 if (rb->by_txn_last_xid == txn->xid)
464 {
465 rb->by_txn_last_xid = InvalidTransactionId;
466 rb->by_txn_last_txn = NULL;
467 }
468
469 /* free data that's contained */
470
471 if (txn->gid != NULL)
472 {
473 pfree(txn->gid);
474 txn->gid = NULL;
475 }
476
477 if (txn->tuplecid_hash != NULL)
478 {
480 txn->tuplecid_hash = NULL;
481 }
482
483 if (txn->invalidations)
484 {
485 pfree(txn->invalidations);
486 txn->invalidations = NULL;
487 }
488
490 {
493 }
494
495 /* Reset the toast hash */
497
498 /* All changes must be deallocated */
499 Assert(txn->size == 0);
500
501 pfree(txn);
502}

References Assert, fb(), ReorderBufferTXN::gid, hash_destroy(), ReorderBufferTXN::invalidations, ReorderBufferTXN::invalidations_distributed, InvalidTransactionId, pfree(), ReorderBufferToastReset(), ReorderBufferTXN::size, ReorderBufferTXN::tuplecid_hash, and ReorderBufferTXN::xid.

Referenced by ReorderBufferCleanupTXN().

◆ ReorderBufferGetCatalogChangesXacts()

TransactionId * ReorderBufferGetCatalogChangesXacts ( ReorderBuffer rb)

Definition at line 3687 of file reorderbuffer.c.

3688{
3689 dlist_iter iter;
3690 TransactionId *xids = NULL;
3691 size_t xcnt = 0;
3692
3693 /* Quick return if the list is empty */
3694 if (dclist_count(&rb->catchange_txns) == 0)
3695 return NULL;
3696
3697 /* Initialize XID array */
3698 xids = palloc_array(TransactionId, dclist_count(&rb->catchange_txns));
3699 dclist_foreach(iter, &rb->catchange_txns)
3700 {
3702 catchange_node,
3703 iter.cur);
3704
3706
3707 xids[xcnt++] = txn->xid;
3708 }
3709
3710 qsort(xids, xcnt, sizeof(TransactionId), xidComparator);
3711
3712 Assert(xcnt == dclist_count(&rb->catchange_txns));
3713 return xids;
3714}

References Assert, dlist_iter::cur, dclist_container, dclist_count(), dclist_foreach, fb(), palloc_array, qsort, rbtxn_has_catalog_changes, ReorderBufferTXN::xid, and xidComparator().

Referenced by SnapBuildSerialize().

◆ ReorderBufferGetInvalidations()

uint32 ReorderBufferGetInvalidations ( ReorderBuffer rb,
TransactionId  xid,
SharedInvalidationMessage **  msgs 
)

Definition at line 5627 of file reorderbuffer.c.

5629{
5630 ReorderBufferTXN *txn;
5631
5632 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
5633 false);
5634
5635 if (txn == NULL)
5636 return 0;
5637
5638 *msgs = txn->invalidations;
5639
5640 return txn->ninvalidations;
5641}

References fb(), ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, and ReorderBufferTXNByXid().

Referenced by SnapBuildDistributeSnapshotAndInval().

◆ ReorderBufferGetOldestTXN()

ReorderBufferTXN * ReorderBufferGetOldestTXN ( ReorderBuffer rb)

Definition at line 1045 of file reorderbuffer.c.

1046{
1047 ReorderBufferTXN *txn;
1048
1050
1051 if (dlist_is_empty(&rb->toplevel_by_lsn))
1052 return NULL;
1053
1054 txn = dlist_head_element(ReorderBufferTXN, node, &rb->toplevel_by_lsn);
1055
1058 return txn;
1059}

References Assert, AssertTXNLsnOrder(), dlist_head_element, dlist_is_empty(), fb(), ReorderBufferTXN::first_lsn, rbtxn_is_known_subxact, and XLogRecPtrIsValid.

Referenced by SnapBuildProcessRunningXacts().

◆ ReorderBufferGetOldestXmin()

TransactionId ReorderBufferGetOldestXmin ( ReorderBuffer rb)

Definition at line 1073 of file reorderbuffer.c.

1074{
1075 ReorderBufferTXN *txn;
1076
1078
1079 if (dlist_is_empty(&rb->txns_by_base_snapshot_lsn))
1080 return InvalidTransactionId;
1081
1082 txn = dlist_head_element(ReorderBufferTXN, base_snapshot_node,
1083 &rb->txns_by_base_snapshot_lsn);
1084 return txn->base_snapshot->xmin;
1085}

References AssertTXNLsnOrder(), ReorderBufferTXN::base_snapshot, dlist_head_element, dlist_is_empty(), fb(), InvalidTransactionId, and SnapshotData::xmin.

Referenced by SnapBuildProcessRunningXacts().

◆ ReorderBufferImmediateInvalidation()

void ReorderBufferImmediateInvalidation ( ReorderBuffer rb,
uint32  ninvalidations,
SharedInvalidationMessage invalidations 
)

Definition at line 3249 of file reorderbuffer.c.

3251{
3255 int i;
3256
3257 if (use_subtxn)
3259
3260 /*
3261 * Force invalidations to happen outside of a valid transaction - that way
3262 * entries will just be marked as invalid without accessing the catalog.
3263 * That's advantageous because we don't need to setup the full state
3264 * necessary for catalog access.
3265 */
3266 if (use_subtxn)
3268
3269 for (i = 0; i < ninvalidations; i++)
3270 LocalExecuteInvalidationMessage(&invalidations[i]);
3271
3272 if (use_subtxn)
3273 {
3276 CurrentResourceOwner = cowner;
3277 }
3278}

References AbortCurrentTransaction(), BeginInternalSubTransaction(), CurrentMemoryContext, CurrentResourceOwner, fb(), i, IsTransactionOrTransactionBlock(), LocalExecuteInvalidationMessage(), MemoryContextSwitchTo(), and RollbackAndReleaseCurrentSubTransaction().

Referenced by ReorderBufferAbort(), ReorderBufferForget(), ReorderBufferInvalidate(), and xact_decode().

◆ ReorderBufferInvalidate()

void ReorderBufferInvalidate ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3218 of file reorderbuffer.c.

3219{
3220 ReorderBufferTXN *txn;
3221
3222 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3223 false);
3224
3225 /* unknown, nothing to do */
3226 if (txn == NULL)
3227 return;
3228
3229 /*
3230 * Process cache invalidation messages if there are any. Even if we're not
3231 * interested in the transaction's contents, it could have manipulated the
3232 * catalog and we need to update the caches according to that.
3233 */
3234 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3236 txn->invalidations);
3237 else
3238 Assert(txn->ninvalidations == 0);
3239}

References Assert, ReorderBufferTXN::base_snapshot, fb(), ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodePrepare().

◆ ReorderBufferIterCompare()

static int ReorderBufferIterCompare ( Datum  a,
Datum  b,
void arg 
)
static

Definition at line 1262 of file reorderbuffer.c.

1263{
1265 XLogRecPtr pos_a = state->entries[DatumGetInt32(a)].lsn;
1266 XLogRecPtr pos_b = state->entries[DatumGetInt32(b)].lsn;
1267
1268 if (pos_a < pos_b)
1269 return 1;
1270 else if (pos_a == pos_b)
1271 return 0;
1272 return -1;
1273}

References a, arg, b, DatumGetInt32(), and fb().

Referenced by ReorderBufferIterTXNInit().

◆ ReorderBufferIterTXNFinish()

static void ReorderBufferIterTXNFinish ( ReorderBuffer rb,
ReorderBufferIterTXNState state 
)
static

Definition at line 1505 of file reorderbuffer.c.

1507{
1508 int32 off;
1509
1510 for (off = 0; off < state->nr_txns; off++)
1511 {
1512 if (state->entries[off].file.vfd != -1)
1513 FileClose(state->entries[off].file.vfd);
1514 }
1515
1516 /* free memory we might have "leaked" in the last *Next call */
1517 if (!dlist_is_empty(&state->old_change))
1518 {
1519 ReorderBufferChange *change;
1520
1521 change = dlist_container(ReorderBufferChange, node,
1522 dlist_pop_head_node(&state->old_change));
1523 ReorderBufferFreeChange(rb, change, true);
1524 Assert(dlist_is_empty(&state->old_change));
1525 }
1526
1527 binaryheap_free(state->heap);
1528 pfree(state);
1529}

References Assert, binaryheap_free(), dlist_container, dlist_is_empty(), dlist_pop_head_node(), fb(), FileClose(), pfree(), and ReorderBufferFreeChange().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferIterTXNInit()

static void ReorderBufferIterTXNInit ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferIterTXNState *volatile iter_state 
)
static

Definition at line 1285 of file reorderbuffer.c.

1287{
1288 Size nr_txns = 0;
1291 int32 off;
1292
1293 *iter_state = NULL;
1294
1295 /* Check ordering of changes in the toplevel transaction. */
1297
1298 /*
1299 * Calculate the size of our heap: one element for every transaction that
1300 * contains changes. (Besides the transactions already in the reorder
1301 * buffer, we count the one we were directly passed.)
1302 */
1303 if (txn->nentries > 0)
1304 nr_txns++;
1305
1307 {
1309
1311
1312 /* Check ordering of changes in this subtransaction. */
1314
1315 if (cur_txn->nentries > 0)
1316 nr_txns++;
1317 }
1318
1319 /* allocate iteration state */
1321 MemoryContextAllocZero(rb->context,
1323 sizeof(ReorderBufferIterTXNEntry) * nr_txns);
1324
1325 state->nr_txns = nr_txns;
1326 dlist_init(&state->old_change);
1327
1328 for (off = 0; off < state->nr_txns; off++)
1329 {
1330 state->entries[off].file.vfd = -1;
1331 state->entries[off].segno = 0;
1332 }
1333
1334 /* allocate heap */
1335 state->heap = binaryheap_allocate(state->nr_txns,
1337 state);
1338
1339 /* Now that the state fields are initialized, it is safe to return it. */
1340 *iter_state = state;
1341
1342 /*
1343 * Now insert items into the binary heap, in an unordered fashion. (We
1344 * will run a heap assembly step at the end; this is more efficient.)
1345 */
1346
1347 off = 0;
1348
1349 /* add toplevel transaction if it contains changes */
1350 if (txn->nentries > 0)
1351 {
1353
1354 if (rbtxn_is_serialized(txn))
1355 {
1356 /* serialize remaining changes */
1358 ReorderBufferRestoreChanges(rb, txn, &state->entries[off].file,
1359 &state->entries[off].segno);
1360 }
1361
1363 &txn->changes);
1364
1365 state->entries[off].lsn = cur_change->lsn;
1366 state->entries[off].change = cur_change;
1367 state->entries[off].txn = txn;
1368
1370 }
1371
1372 /* add subtransactions if they contain changes */
1374 {
1376
1378
1379 if (cur_txn->nentries > 0)
1380 {
1382
1384 {
1385 /* serialize remaining changes */
1388 &state->entries[off].file,
1389 &state->entries[off].segno);
1390 }
1392 &cur_txn->changes);
1393
1394 state->entries[off].lsn = cur_change->lsn;
1395 state->entries[off].change = cur_change;
1396 state->entries[off].txn = cur_txn;
1397
1399 }
1400 }
1401
1402 /* assemble a valid binary heap */
1403 binaryheap_build(state->heap);
1404}

References AssertChangeLsnOrder(), binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), ReorderBufferTXN::changes, dlist_container, dlist_foreach, dlist_head_element, dlist_init(), fb(), Int32GetDatum(), MemoryContextAllocZero(), ReorderBufferTXN::nentries, rbtxn_is_serialized, ReorderBufferIterCompare(), ReorderBufferRestoreChanges(), ReorderBufferSerializeTXN(), and ReorderBufferTXN::subtxns.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferIterTXNNext()

static ReorderBufferChange * ReorderBufferIterTXNNext ( ReorderBuffer rb,
ReorderBufferIterTXNState state 
)
static

Definition at line 1413 of file reorderbuffer.c.

1414{
1415 ReorderBufferChange *change;
1417 int32 off;
1418
1419 /* nothing there anymore */
1420 if (binaryheap_empty(state->heap))
1421 return NULL;
1422
1423 off = DatumGetInt32(binaryheap_first(state->heap));
1424 entry = &state->entries[off];
1425
1426 /* free memory we might have "leaked" in the previous *Next call */
1427 if (!dlist_is_empty(&state->old_change))
1428 {
1429 change = dlist_container(ReorderBufferChange, node,
1430 dlist_pop_head_node(&state->old_change));
1431 ReorderBufferFreeChange(rb, change, true);
1432 Assert(dlist_is_empty(&state->old_change));
1433 }
1434
1435 change = entry->change;
1436
1437 /*
1438 * update heap with information about which transaction has the next
1439 * relevant change in LSN order
1440 */
1441
1442 /* there are in-memory changes */
1443 if (dlist_has_next(&entry->txn->changes, &entry->change->node))
1444 {
1445 dlist_node *next = dlist_next_node(&entry->txn->changes, &change->node);
1448
1449 /* txn stays the same */
1450 state->entries[off].lsn = next_change->lsn;
1451 state->entries[off].change = next_change;
1452
1454 return change;
1455 }
1456
1457 /* try to load changes from disk */
1458 if (entry->txn->nentries != entry->txn->nentries_mem)
1459 {
1460 /*
1461 * Ugly: restoring changes will reuse *Change records, thus delete the
1462 * current one from the per-tx list and only free in the next call.
1463 */
1464 dlist_delete(&change->node);
1465 dlist_push_tail(&state->old_change, &change->node);
1466
1467 /*
1468 * Update the total bytes processed by the txn for which we are
1469 * releasing the current set of changes and restoring the new set of
1470 * changes.
1471 */
1472 rb->totalBytes += entry->txn->size;
1473 if (ReorderBufferRestoreChanges(rb, entry->txn, &entry->file,
1474 &state->entries[off].segno))
1475 {
1476 /* successfully restored changes from disk */
1479 &entry->txn->changes);
1480
1481 elog(DEBUG2, "restored %u/%u changes from disk",
1482 (uint32) entry->txn->nentries_mem,
1483 (uint32) entry->txn->nentries);
1484
1485 Assert(entry->txn->nentries_mem);
1486 /* txn stays the same */
1487 state->entries[off].lsn = next_change->lsn;
1488 state->entries[off].change = next_change;
1490
1491 return change;
1492 }
1493 }
1494
1495 /* ok, no changes there anymore, remove */
1497
1498 return change;
1499}

References Assert, binaryheap_empty, binaryheap_first(), binaryheap_remove_first(), binaryheap_replace_first(), ReorderBufferIterTXNEntry::change, ReorderBufferTXN::changes, DatumGetInt32(), DEBUG2, dlist_container, dlist_delete(), dlist_has_next(), dlist_head_element, dlist_is_empty(), dlist_next_node(), dlist_pop_head_node(), dlist_push_tail(), elog, fb(), ReorderBufferIterTXNEntry::file, Int32GetDatum(), ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, next, ReorderBufferChange::node, ReorderBufferFreeChange(), ReorderBufferRestoreChanges(), ReorderBufferTXN::size, and ReorderBufferIterTXNEntry::txn.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferLargestStreamableTopTXN()

static ReorderBufferTXN * ReorderBufferLargestStreamableTopTXN ( ReorderBuffer rb)
static

Definition at line 3842 of file reorderbuffer.c.

3843{
3844 dlist_iter iter;
3845 Size largest_size = 0;
3847
3848 /* Find the largest top-level transaction having a base snapshot. */
3849 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
3850 {
3851 ReorderBufferTXN *txn;
3852
3853 txn = dlist_container(ReorderBufferTXN, base_snapshot_node, iter.cur);
3854
3855 /* must not be a subtxn */
3857 /* base_snapshot must be set */
3858 Assert(txn->base_snapshot != NULL);
3859
3860 /* Don't consider these kinds of transactions for eviction. */
3861 if (rbtxn_has_partial_change(txn) ||
3863 rbtxn_is_aborted(txn))
3864 continue;
3865
3866 /* Find the largest of the eviction candidates. */
3867 if ((largest == NULL || txn->total_size > largest_size) &&
3868 (txn->total_size > 0))
3869 {
3870 largest = txn;
3871 largest_size = txn->total_size;
3872 }
3873 }
3874
3875 return largest;
3876}

References Assert, ReorderBufferTXN::base_snapshot, dlist_iter::cur, dlist_container, dlist_foreach, fb(), rbtxn_has_partial_change, rbtxn_has_streamable_change, rbtxn_is_aborted, rbtxn_is_known_subxact, and ReorderBufferTXN::total_size.

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferLargestTXN()

static ReorderBufferTXN * ReorderBufferLargestTXN ( ReorderBuffer rb)
static

Definition at line 3801 of file reorderbuffer.c.

3802{
3804
3805 /* Get the largest transaction from the max-heap */
3807 pairingheap_first(rb->txn_heap));
3808
3809 Assert(largest);
3810 Assert(largest->size > 0);
3811 Assert(largest->size <= rb->size);
3812
3813 return largest;
3814}

References Assert, fb(), pairingheap_container, and pairingheap_first().

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferMaybeMarkTXNStreamed()

static void ReorderBufferMaybeMarkTXNStreamed ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 2139 of file reorderbuffer.c.

2140{
2141 /*
2142 * The top-level transaction, is marked as streamed always, even if it
2143 * does not contain any changes (that is, when all the changes are in
2144 * subtransactions).
2145 *
2146 * For subtransactions, we only mark them as streamed when there are
2147 * changes in them.
2148 *
2149 * We do it this way because of aborts - we don't want to send aborts for
2150 * XIDs the downstream is not aware of. And of course, it always knows
2151 * about the top-level xact (we send the XID in all messages), but we
2152 * never stream XIDs of empty subxacts.
2153 */
2154 if (rbtxn_is_toptxn(txn) || (txn->nentries_mem != 0))
2156}

References ReorderBufferTXN::nentries_mem, RBTXN_IS_STREAMED, rbtxn_is_toptxn, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferProcessTXN(), and ReorderBufferTruncateTXN().

◆ ReorderBufferPrepare()

void ReorderBufferPrepare ( ReorderBuffer rb,
TransactionId  xid,
char gid 
)

Definition at line 2956 of file reorderbuffer.c.

2958{
2959 ReorderBufferTXN *txn;
2960
2961 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2962 false);
2963
2964 /* unknown transaction, nothing to replay */
2965 if (txn == NULL)
2966 return;
2967
2968 /*
2969 * txn must have been marked as a prepared transaction and must have
2970 * neither been skipped nor sent a prepare. Also, the prepare info must
2971 * have been updated in it by now.
2972 */
2975
2976 txn->gid = pstrdup(gid);
2977
2978 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
2979 txn->prepare_time, txn->origin_id, txn->origin_lsn);
2980
2981 /*
2982 * Send a prepare if not already done so. This might occur if we have
2983 * detected a concurrent abort while replaying the non-streaming
2984 * transaction.
2985 */
2986 if (!rbtxn_sent_prepare(txn))
2987 {
2988 rb->prepare(rb, txn, txn->final_lsn);
2990 }
2991}

References Assert, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::gid, InvalidXLogRecPtr, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, pstrdup(), RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, ReorderBufferReplay(), ReorderBufferTXNByXid(), ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodePrepare().

◆ ReorderBufferProcessPartialChange()

static void ReorderBufferProcessPartialChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferChange change,
bool  toast_insert 
)
static

Definition at line 742 of file reorderbuffer.c.

745{
746 ReorderBufferTXN *toptxn;
747
748 /*
749 * The partial changes need to be processed only while streaming
750 * in-progress transactions.
751 */
753 return;
754
755 /* Get the top transaction. */
756 toptxn = rbtxn_get_toptxn(txn);
757
758 /*
759 * Indicate a partial change for toast inserts. The change will be
760 * considered as complete once we get the insert or update on the main
761 * table and we are sure that the pending toast chunks are not required
762 * anymore.
763 *
764 * If we allow streaming when there are pending toast chunks then such
765 * chunks won't be released till the insert (multi_insert) is complete and
766 * we expect the txn to have streamed all changes after streaming. This
767 * restriction is mainly to ensure the correctness of streamed
768 * transactions and it doesn't seem worth uplifting such a restriction
769 * just to allow this case because anyway we will stream the transaction
770 * once such an insert is complete.
771 */
772 if (toast_insert)
774 else if (rbtxn_has_partial_change(toptxn) &&
775 IsInsertOrUpdate(change->action) &&
778
779 /*
780 * Indicate a partial change for speculative inserts. The change will be
781 * considered as complete once we get the speculative confirm or abort
782 * token.
783 */
784 if (IsSpecInsert(change->action))
786 else if (rbtxn_has_partial_change(toptxn) &&
789
790 /*
791 * Stream the transaction if it is serialized before and the changes are
792 * now complete in the top-level transaction.
793 *
794 * The reason for doing the streaming of such a transaction as soon as we
795 * get the complete change for it is that previously it would have reached
796 * the memory threshold and wouldn't get streamed because of incomplete
797 * changes. Delaying such transactions would increase apply lag for them.
798 */
800 !(rbtxn_has_partial_change(toptxn)) &&
801 rbtxn_is_serialized(txn) &&
803 ReorderBufferStreamTXN(rb, toptxn);
804}

References ReorderBufferChange::action, ReorderBufferChange::clear_toast_afterwards, ReorderBufferChange::data, fb(), IsInsertOrUpdate, IsSpecConfirmOrAbort, IsSpecInsert, rbtxn_get_toptxn, RBTXN_HAS_PARTIAL_CHANGE, rbtxn_has_partial_change, rbtxn_has_streamable_change, rbtxn_is_serialized, ReorderBufferCanStartStreaming(), ReorderBufferCanStream(), ReorderBufferStreamTXN(), ReorderBufferChange::tp, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferQueueChange().

◆ ReorderBufferProcessTXN()

static void ReorderBufferProcessTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
XLogRecPtr  commit_lsn,
volatile Snapshot  snapshot_now,
volatile CommandId  command_id,
bool  streaming 
)
static

Definition at line 2204 of file reorderbuffer.c.

2209{
2210 bool using_subtxn;
2216 volatile bool stream_started = false;
2217 ReorderBufferTXN *volatile curtxn = NULL;
2218
2219 /* build data to be able to lookup the CommandIds of catalog tuples */
2221
2222 /* setup the initial snapshot */
2223 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2224
2225 /*
2226 * Decoding needs access to syscaches et al., which in turn use
2227 * heavyweight locks and such. Thus we need to have enough state around to
2228 * keep track of those. The easiest way is to simply use a transaction
2229 * internally. That also allows us to easily enforce that nothing writes
2230 * to the database by checking for xid assignments.
2231 *
2232 * When we're called via the SQL SRF there's already a transaction
2233 * started, so start an explicit subtransaction there.
2234 */
2236
2237 PG_TRY();
2238 {
2239 ReorderBufferChange *change;
2240 int changes_count = 0; /* used to accumulate the number of
2241 * changes */
2242
2243 if (using_subtxn)
2244 BeginInternalSubTransaction(streaming ? "stream" : "replay");
2245 else
2247
2248 /*
2249 * We only need to send begin/begin-prepare for non-streamed
2250 * transactions.
2251 */
2252 if (!streaming)
2253 {
2254 if (rbtxn_is_prepared(txn))
2255 rb->begin_prepare(rb, txn);
2256 else
2257 rb->begin(rb, txn);
2258 }
2259
2261 while ((change = ReorderBufferIterTXNNext(rb, iterstate)) != NULL)
2262 {
2263 Relation relation = NULL;
2264 Oid reloid;
2265
2267
2268 /*
2269 * We can't call start stream callback before processing first
2270 * change.
2271 */
2273 {
2274 if (streaming)
2275 {
2276 txn->origin_id = change->origin_id;
2277 rb->stream_start(rb, txn, change->lsn);
2278 stream_started = true;
2279 }
2280 }
2281
2282 /*
2283 * Enforce correct ordering of changes, merged from multiple
2284 * subtransactions. The changes may have the same LSN due to
2285 * MULTI_INSERT xlog records.
2286 */
2288
2289 prev_lsn = change->lsn;
2290
2291 /*
2292 * Set the current xid to detect concurrent aborts. This is
2293 * required for the cases when we decode the changes before the
2294 * COMMIT record is processed.
2295 */
2296 if (streaming || rbtxn_is_prepared(change->txn))
2297 {
2298 curtxn = change->txn;
2300 }
2301
2302 switch (change->action)
2303 {
2305
2306 /*
2307 * Confirmation for speculative insertion arrived. Simply
2308 * use as a normal record. It'll be cleaned up at the end
2309 * of INSERT processing.
2310 */
2311 if (specinsert == NULL)
2312 elog(ERROR, "invalid ordering of speculative insertion changes");
2313 Assert(specinsert->data.tp.oldtuple == NULL);
2314 change = specinsert;
2316
2317 /* intentionally fall through */
2322 Assert(snapshot_now);
2323
2324 reloid = RelidByRelfilenumber(change->data.tp.rlocator.spcOid,
2325 change->data.tp.rlocator.relNumber);
2326
2327 /*
2328 * Mapped catalog tuple without data, emitted while
2329 * catalog table was in the process of being rewritten. We
2330 * can fail to look up the relfilenumber, because the
2331 * relmapper has no "historic" view, in contrast to the
2332 * normal catalog during decoding. Thus repeated rewrites
2333 * can cause a lookup failure. That's OK because we do not
2334 * decode catalog changes anyway. Normally such tuples
2335 * would be skipped over below, but we can't identify
2336 * whether the table should be logically logged without
2337 * mapping the relfilenumber to the oid.
2338 */
2339 if (reloid == InvalidOid &&
2340 change->data.tp.newtuple == NULL &&
2341 change->data.tp.oldtuple == NULL)
2342 goto change_done;
2343 else if (reloid == InvalidOid)
2344 elog(ERROR, "could not map filenumber \"%s\" to relation OID",
2345 relpathperm(change->data.tp.rlocator,
2346 MAIN_FORKNUM).str);
2347
2348 relation = RelationIdGetRelation(reloid);
2349
2350 if (!RelationIsValid(relation))
2351 elog(ERROR, "could not open relation with OID %u (for filenumber \"%s\")",
2352 reloid,
2353 relpathperm(change->data.tp.rlocator,
2354 MAIN_FORKNUM).str);
2355
2356 if (!RelationIsLogicallyLogged(relation))
2357 goto change_done;
2358
2359 /*
2360 * Ignore temporary heaps created during DDL unless the
2361 * plugin has asked for them.
2362 */
2363 if (relation->rd_rel->relrewrite && !rb->output_rewrites)
2364 goto change_done;
2365
2366 /*
2367 * For now ignore sequence changes entirely. Most of the
2368 * time they don't log changes using records we
2369 * understand, so it doesn't make sense to handle the few
2370 * cases we do.
2371 */
2372 if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
2373 goto change_done;
2374
2375 /* user-triggered change */
2376 if (!IsToastRelation(relation))
2377 {
2378 ReorderBufferToastReplace(rb, txn, relation, change);
2379 ReorderBufferApplyChange(rb, txn, relation, change,
2380 streaming);
2381
2382 /*
2383 * Only clear reassembled toast chunks if we're sure
2384 * they're not required anymore. The creator of the
2385 * tuple tells us.
2386 */
2387 if (change->data.tp.clear_toast_afterwards)
2389 }
2390 /* we're not interested in toast deletions */
2391 else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
2392 {
2393 /*
2394 * Need to reassemble the full toasted Datum in
2395 * memory, to ensure the chunks don't get reused till
2396 * we're done remove it from the list of this
2397 * transaction's changes. Otherwise it will get
2398 * freed/reused while restoring spooled data from
2399 * disk.
2400 */
2401 Assert(change->data.tp.newtuple != NULL);
2402
2403 dlist_delete(&change->node);
2404 ReorderBufferToastAppendChunk(rb, txn, relation,
2405 change);
2406 }
2407
2409
2410 /*
2411 * If speculative insertion was confirmed, the record
2412 * isn't needed anymore.
2413 */
2414 if (specinsert != NULL)
2415 {
2417 specinsert = NULL;
2418 }
2419
2420 if (RelationIsValid(relation))
2421 {
2422 RelationClose(relation);
2423 relation = NULL;
2424 }
2425 break;
2426
2428
2429 /*
2430 * Speculative insertions are dealt with by delaying the
2431 * processing of the insert until the confirmation record
2432 * arrives. For that we simply unlink the record from the
2433 * chain, so it does not get freed/reused while restoring
2434 * spooled data from disk.
2435 *
2436 * This is safe in the face of concurrent catalog changes
2437 * because the relevant relation can't be changed between
2438 * speculative insertion and confirmation due to
2439 * CheckTableNotInUse() and locking.
2440 */
2441
2442 /* Previous speculative insertion must be aborted */
2444
2445 /* and memorize the pending insertion */
2446 dlist_delete(&change->node);
2447 specinsert = change;
2448 break;
2449
2451
2452 /*
2453 * Abort for speculative insertion arrived. So cleanup the
2454 * specinsert tuple and toast hash.
2455 *
2456 * Note that we get the spec abort change for each toast
2457 * entry but we need to perform the cleanup only the first
2458 * time we get it for the main table.
2459 */
2460 if (specinsert != NULL)
2461 {
2462 /*
2463 * We must clean the toast hash before processing a
2464 * completely new tuple to avoid confusion about the
2465 * previous tuple's toast chunks.
2466 */
2469
2470 /* We don't need this record anymore. */
2472 specinsert = NULL;
2473 }
2474 break;
2475
2477 {
2478 int i;
2479 int nrelids = change->data.truncate.nrelids;
2480 int nrelations = 0;
2481 Relation *relations;
2482
2483 relations = palloc0_array(Relation, nrelids);
2484 for (i = 0; i < nrelids; i++)
2485 {
2486 Oid relid = change->data.truncate.relids[i];
2487 Relation rel;
2488
2489 rel = RelationIdGetRelation(relid);
2490
2491 if (!RelationIsValid(rel))
2492 elog(ERROR, "could not open relation with OID %u", relid);
2493
2494 if (!RelationIsLogicallyLogged(rel))
2495 continue;
2496
2497 relations[nrelations++] = rel;
2498 }
2499
2500 /* Apply the truncate. */
2502 relations, change,
2503 streaming);
2504
2505 for (i = 0; i < nrelations; i++)
2506 RelationClose(relations[i]);
2507
2508 break;
2509 }
2510
2512 ReorderBufferApplyMessage(rb, txn, change, streaming);
2513 break;
2514
2516 /* Execute the invalidation messages locally */
2518 change->data.inval.invalidations);
2519 break;
2520
2522 /* get rid of the old */
2524
2525 if (snapshot_now->copied)
2526 {
2527 ReorderBufferFreeSnap(rb, snapshot_now);
2528 snapshot_now =
2530 txn, command_id);
2531 }
2532
2533 /*
2534 * Restored from disk, need to be careful not to double
2535 * free. We could introduce refcounting for that, but for
2536 * now this seems infrequent enough not to care.
2537 */
2538 else if (change->data.snapshot->copied)
2539 {
2540 snapshot_now =
2542 txn, command_id);
2543 }
2544 else
2545 {
2546 snapshot_now = change->data.snapshot;
2547 }
2548
2549 /* and continue with the new one */
2550 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2551 break;
2552
2555
2556 if (command_id < change->data.command_id)
2557 {
2558 command_id = change->data.command_id;
2559
2560 if (!snapshot_now->copied)
2561 {
2562 /* we don't use the global one anymore */
2563 snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2564 txn, command_id);
2565 }
2566
2567 snapshot_now->curcid = command_id;
2568
2570 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2571 }
2572
2573 break;
2574
2576 elog(ERROR, "tuplecid value in changequeue");
2577 break;
2578 }
2579
2580 /*
2581 * It is possible that the data is not sent to downstream for a
2582 * long time either because the output plugin filtered it or there
2583 * is a DDL that generates a lot of data that is not processed by
2584 * the plugin. So, in such cases, the downstream can timeout. To
2585 * avoid that we try to send a keepalive message if required.
2586 * Trying to send a keepalive message after every change has some
2587 * overhead, but testing showed there is no noticeable overhead if
2588 * we do it after every ~100 changes.
2589 */
2590#define CHANGES_THRESHOLD 100
2591
2593 {
2594 rb->update_progress_txn(rb, txn, prev_lsn);
2595 changes_count = 0;
2596 }
2597 }
2598
2599 /* speculative insertion record must be freed by now */
2601
2602 /* clean up the iterator */
2604 iterstate = NULL;
2605
2606 /*
2607 * Update total transaction count and total bytes processed by the
2608 * transaction and its subtransactions. Ensure to not count the
2609 * streamed transaction multiple times.
2610 *
2611 * Note that the statistics computation has to be done after
2612 * ReorderBufferIterTXNFinish as it releases the serialized change
2613 * which we have already accounted in ReorderBufferIterTXNNext.
2614 */
2615 if (!rbtxn_is_streamed(txn))
2616 rb->totalTxns++;
2617
2618 rb->totalBytes += txn->total_size;
2619
2620 /*
2621 * Done with current changes, send the last message for this set of
2622 * changes depending upon streaming mode.
2623 */
2624 if (streaming)
2625 {
2626 if (stream_started)
2627 {
2628 rb->stream_stop(rb, txn, prev_lsn);
2629 stream_started = false;
2630 }
2631 }
2632 else
2633 {
2634 /*
2635 * Call either PREPARE (for two-phase transactions) or COMMIT (for
2636 * regular ones).
2637 */
2638 if (rbtxn_is_prepared(txn))
2639 {
2641 rb->prepare(rb, txn, commit_lsn);
2643 }
2644 else
2645 rb->commit(rb, txn, commit_lsn);
2646 }
2647
2648 /* this is just a sanity check against bad output plugin behaviour */
2650 elog(ERROR, "output plugin used XID %u",
2652
2653 /*
2654 * Remember the command ID and snapshot for the next set of changes in
2655 * streaming mode.
2656 */
2657 if (streaming)
2658 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2659 else if (snapshot_now->copied)
2660 ReorderBufferFreeSnap(rb, snapshot_now);
2661
2662 /* cleanup */
2664
2665 /*
2666 * Aborting the current (sub-)transaction as a whole has the right
2667 * semantics. We want all locks acquired in here to be released, not
2668 * reassigned to the parent and we do not want any database access
2669 * have persistent effects.
2670 */
2672
2673 /* make sure there's no cache pollution */
2675 {
2678 }
2679 else
2680 {
2684 }
2685
2686 if (using_subtxn)
2687 {
2690 CurrentResourceOwner = cowner;
2691 }
2692
2693 /*
2694 * We are here due to one of the four reasons: 1. Decoding an
2695 * in-progress txn. 2. Decoding a prepared txn. 3. Decoding of a
2696 * prepared txn that was (partially) streamed. 4. Decoding a committed
2697 * txn.
2698 *
2699 * For 1, we allow truncation of txn data by removing the changes
2700 * already streamed but still keeping other things like invalidations,
2701 * snapshot, and tuplecids. For 2 and 3, we indicate
2702 * ReorderBufferTruncateTXN to do more elaborate truncation of txn
2703 * data as the entire transaction has been decoded except for commit.
2704 * For 4, as the entire txn has been decoded, we can fully clean up
2705 * the TXN reorder buffer.
2706 */
2707 if (streaming || rbtxn_is_prepared(txn))
2708 {
2709 if (streaming)
2711
2713 /* Reset the CheckXidAlive */
2715 }
2716 else
2718 }
2719 PG_CATCH();
2720 {
2723
2724 /* TODO: Encapsulate cleanup from the PG_TRY and PG_CATCH blocks */
2725 if (iterstate)
2727
2729
2730 /*
2731 * Force cache invalidation to happen outside of a valid transaction
2732 * to prevent catalog access as we just caught an error.
2733 */
2735
2736 /* make sure there's no cache pollution */
2738 {
2741 }
2742 else
2743 {
2747 }
2748
2749 if (using_subtxn)
2750 {
2753 CurrentResourceOwner = cowner;
2754 }
2755
2756 /* Free the specinsert change before freeing the ReorderBufferTXN */
2757 if (specinsert != NULL)
2758 {
2760 specinsert = NULL;
2761 }
2762
2763 /*
2764 * The error code ERRCODE_TRANSACTION_ROLLBACK indicates a concurrent
2765 * abort of the (sub)transaction we are streaming or preparing. We
2766 * need to do the cleanup and return gracefully on this error, see
2767 * SetupCheckXidLive.
2768 *
2769 * This error code can be thrown by one of the callbacks we call
2770 * during decoding so we need to ensure that we return gracefully only
2771 * when we are sending the data in streaming mode and the streaming is
2772 * not finished yet or when we are sending the data out on a PREPARE
2773 * during a two-phase commit.
2774 */
2775 if (errdata->sqlerrcode == ERRCODE_TRANSACTION_ROLLBACK &&
2777 {
2778 /* curtxn must be set for streaming or prepared transactions */
2779 Assert(curtxn);
2780
2781 /* Cleanup the temporary error state. */
2784 errdata = NULL;
2785
2786 /* Remember the transaction is aborted. */
2788 curtxn->txn_flags |= RBTXN_IS_ABORTED;
2789
2790 /* Mark the transaction is streamed if appropriate */
2791 if (stream_started)
2793
2794 /* Reset the TXN so that it is allowed to stream remaining data. */
2795 ReorderBufferResetTXN(rb, txn, snapshot_now,
2796 command_id, prev_lsn);
2797 }
2798 else
2799 {
2802 PG_RE_THROW();
2803 }
2804 }
2805 PG_END_TRY();
2806}

References AbortCurrentTransaction(), ReorderBufferChange::action, Assert, BeginInternalSubTransaction(), CHANGES_THRESHOLD, CHECK_FOR_INTERRUPTS, CheckXidAlive, ReorderBufferChange::clear_toast_afterwards, ReorderBufferChange::command_id, SnapshotData::copied, CopyErrorData(), SnapshotData::curcid, CurrentMemoryContext, CurrentResourceOwner, ReorderBufferChange::data, data, dlist_delete(), elog, ERROR, fb(), FlushErrorState(), FreeErrorData(), GetCurrentTransactionId(), GetCurrentTransactionIdIfAny(), i, ReorderBufferChange::inval, InvalidateSystemCaches(), ReorderBufferChange::invalidations, ReorderBufferTXN::invalidations, ReorderBufferTXN::invalidations_distributed, InvalidCommandId, InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsToastRelation(), IsTransactionOrTransactionBlock(), ReorderBufferChange::lsn, MAIN_FORKNUM, MemoryContextSwitchTo(), ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::ninvalidations_distributed, ReorderBufferChange::node, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::origin_id, ReorderBufferTXN::origin_id, palloc0_array, PG_CATCH, PG_END_TRY, pg_fallthrough, PG_RE_THROW, PG_TRY, rbtxn_distr_inval_overflowed, RBTXN_IS_ABORTED, rbtxn_is_committed, rbtxn_is_prepared, rbtxn_is_streamed, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, RelationData::rd_rel, RelationClose(), RelationIdGetRelation(), RelationIsLogicallyLogged, RelationIsValid, RelidByRelfilenumber(), ReorderBufferChange::relids, RelFileLocator::relNumber, relpathperm, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferApplyChange(), ReorderBufferApplyMessage(), ReorderBufferApplyTruncate(), ReorderBufferBuildTupleCidHash(), ReorderBufferCleanupTXN(), ReorderBufferCopySnap(), ReorderBufferExecuteInvalidations(), ReorderBufferFreeChange(), ReorderBufferFreeSnap(), ReorderBufferIterTXNFinish(), ReorderBufferIterTXNInit(), ReorderBufferIterTXNNext(), ReorderBufferMaybeMarkTXNStreamed(), ReorderBufferResetTXN(), ReorderBufferSaveTXNSnapshot(), ReorderBufferToastAppendChunk(), ReorderBufferToastReplace(), ReorderBufferToastReset(), ReorderBufferTruncateTXN(), ReorderBufferChange::rlocator, RollbackAndReleaseCurrentSubTransaction(), SetupCheckXidLive(), SetupHistoricSnapshot(), ReorderBufferChange::snapshot, RelFileLocator::spcOid, StartTransactionCommand(), TeardownHistoricSnapshot(), ReorderBufferTXN::total_size, ReorderBufferChange::tp, ReorderBufferChange::truncate, ReorderBufferTXN::tuplecid_hash, ReorderBufferChange::txn, ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by ReorderBufferReplay(), and ReorderBufferStreamTXN().

◆ ReorderBufferProcessXid()

void ReorderBufferProcessXid ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3291 of file reorderbuffer.c.

3292{
3293 /* many records won't have an xid assigned, centralize check here */
3294 if (xid != InvalidTransactionId)
3295 ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3296}

References fb(), InvalidTransactionId, and ReorderBufferTXNByXid().

Referenced by heap2_decode(), heap_decode(), LogicalDecodingProcessRecord(), logicalmsg_decode(), standby_decode(), xact_decode(), xlog2_decode(), and xlog_decode().

◆ ReorderBufferQueueChange()

void ReorderBufferQueueChange ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
ReorderBufferChange change,
bool  toast_insert 
)

Definition at line 811 of file reorderbuffer.c.

813{
814 ReorderBufferTXN *txn;
815
816 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
817
818 /*
819 * If we have detected that the transaction is aborted while streaming the
820 * previous changes or by checking its CLOG, there is no point in
821 * collecting further changes for it.
822 */
823 if (rbtxn_is_aborted(txn))
824 {
825 /*
826 * We don't need to update memory accounting for this change as we
827 * have not added it to the queue yet.
828 */
829 ReorderBufferFreeChange(rb, change, false);
830 return;
831 }
832
833 /*
834 * The changes that are sent downstream are considered streamable. We
835 * remember such transactions so that only those will later be considered
836 * for streaming.
837 */
838 if (change->action == REORDER_BUFFER_CHANGE_INSERT ||
844 {
845 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
846
848 }
849
850 change->lsn = lsn;
851 change->txn = txn;
852
854 dlist_push_tail(&txn->changes, &change->node);
855 txn->nentries++;
856 txn->nentries_mem++;
857
858 /* update memory accounting information */
861
862 /* process partial change */
864
865 /* check the memory limits and evict something if needed */
867}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::changes, dlist_push_tail(), fb(), ReorderBufferChange::lsn, ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, rbtxn_get_toptxn, RBTXN_HAS_STREAMABLE_CHANGE, rbtxn_is_aborted, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferCheckMemoryLimit(), ReorderBufferFreeChange(), ReorderBufferProcessPartialChange(), ReorderBufferTXNByXid(), ReorderBufferChange::txn, ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodeDelete(), DecodeInsert(), DecodeMultiInsert(), DecodeSpecConfirm(), DecodeTruncate(), DecodeUpdate(), ReorderBufferAddNewCommandId(), ReorderBufferAddSnapshot(), ReorderBufferQueueInvalidations(), and ReorderBufferQueueMessage().

◆ ReorderBufferQueueInvalidations()

◆ ReorderBufferQueueMessage()

void ReorderBufferQueueMessage ( ReorderBuffer rb,
TransactionId  xid,
Snapshot  snap,
XLogRecPtr  lsn,
bool  transactional,
const char prefix,
Size  message_size,
const char message 
)

Definition at line 874 of file reorderbuffer.c.

878{
879 if (transactional)
880 {
881 MemoryContext oldcontext;
882 ReorderBufferChange *change;
883
885
886 /*
887 * We don't expect snapshots for transactional changes - we'll use the
888 * snapshot derived later during apply (unless the change gets
889 * skipped).
890 */
891 Assert(!snap);
892
893 oldcontext = MemoryContextSwitchTo(rb->context);
894
897 change->data.msg.prefix = pstrdup(prefix);
898 change->data.msg.message_size = message_size;
899 change->data.msg.message = palloc(message_size);
900 memcpy(change->data.msg.message, message, message_size);
901
902 ReorderBufferQueueChange(rb, xid, lsn, change, false);
903
904 MemoryContextSwitchTo(oldcontext);
905 }
906 else
907 {
908 ReorderBufferTXN *txn = NULL;
909 volatile Snapshot snapshot_now = snap;
910
911 /* Non-transactional changes require a valid snapshot. */
912 Assert(snapshot_now);
913
914 if (xid != InvalidTransactionId)
915 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
916
917 /* setup snapshot to allow catalog access */
918 SetupHistoricSnapshot(snapshot_now, NULL);
919 PG_TRY();
920 {
921 rb->message(rb, txn, lsn, false, prefix, message_size, message);
922
924 }
925 PG_CATCH();
926 {
928 PG_RE_THROW();
929 }
930 PG_END_TRY();
931 }
932}

References ReorderBufferChange::action, Assert, ReorderBufferChange::data, fb(), InvalidTransactionId, memcpy(), MemoryContextSwitchTo(), ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, palloc(), PG_CATCH, PG_END_TRY, PG_RE_THROW, PG_TRY, ReorderBufferChange::prefix, pstrdup(), REORDER_BUFFER_CHANGE_MESSAGE, ReorderBufferAllocChange(), ReorderBufferQueueChange(), ReorderBufferTXNByXid(), SetupHistoricSnapshot(), and TeardownHistoricSnapshot().

Referenced by logicalmsg_decode().

◆ ReorderBufferRememberPrepareInfo()

bool ReorderBufferRememberPrepareInfo ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  prepare_lsn,
XLogRecPtr  end_lsn,
TimestampTz  prepare_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn 
)

Definition at line 2903 of file reorderbuffer.c.

2907{
2908 ReorderBufferTXN *txn;
2909
2910 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2911
2912 /* unknown transaction, nothing to do */
2913 if (txn == NULL)
2914 return false;
2915
2916 /*
2917 * Remember the prepare information to be later used by commit prepared in
2918 * case we skip doing prepare.
2919 */
2920 txn->final_lsn = prepare_lsn;
2921 txn->end_lsn = end_lsn;
2922 txn->prepare_time = prepare_time;
2923 txn->origin_id = origin_id;
2924 txn->origin_lsn = origin_lsn;
2925
2926 /* Mark this transaction as a prepared transaction */
2929
2930 return true;
2931}

References Assert, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, InvalidXLogRecPtr, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by DecodePrepare().

◆ ReorderBufferReplay()

static void ReorderBufferReplay ( ReorderBufferTXN txn,
ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
TimestampTz  commit_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn 
)
static

Definition at line 2819 of file reorderbuffer.c.

2824{
2825 Snapshot snapshot_now;
2826 CommandId command_id = FirstCommandId;
2827
2828 txn->final_lsn = commit_lsn;
2829 txn->end_lsn = end_lsn;
2830 txn->commit_time = commit_time;
2831 txn->origin_id = origin_id;
2832 txn->origin_lsn = origin_lsn;
2833
2834 /*
2835 * If the transaction was (partially) streamed, we need to commit it in a
2836 * 'streamed' way. That is, we first stream the remaining part of the
2837 * transaction, and then invoke stream_commit message.
2838 *
2839 * Called after everything (origin ID, LSN, ...) is stored in the
2840 * transaction to avoid passing that information directly.
2841 */
2842 if (rbtxn_is_streamed(txn))
2843 {
2845 return;
2846 }
2847
2848 /*
2849 * If this transaction has no snapshot, it didn't make any changes to the
2850 * database, so there's nothing to decode. Note that
2851 * ReorderBufferCommitChild will have transferred any snapshots from
2852 * subtransactions if there were any.
2853 */
2854 if (txn->base_snapshot == NULL)
2855 {
2856 Assert(txn->ninvalidations == 0);
2857
2858 /*
2859 * Removing this txn before a commit might result in the computation
2860 * of an incorrect restart_lsn. See SnapBuildProcessRunningXacts.
2861 */
2862 if (!rbtxn_is_prepared(txn))
2864 return;
2865 }
2866
2867 snapshot_now = txn->base_snapshot;
2868
2869 /* Process and send the changes to output plugin. */
2870 ReorderBufferProcessTXN(rb, txn, commit_lsn, snapshot_now,
2871 command_id, false);
2872}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::commit_time, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, FirstCommandId, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, rbtxn_is_prepared, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferProcessTXN(), and ReorderBufferStreamCommit().

Referenced by ReorderBufferCommit(), ReorderBufferFinishPrepared(), and ReorderBufferPrepare().

◆ ReorderBufferResetTXN()

static void ReorderBufferResetTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
Snapshot  snapshot_now,
CommandId  command_id,
XLogRecPtr  last_lsn 
)
static

Definition at line 2166 of file reorderbuffer.c.

2170{
2171 /* Discard the changes that we just streamed */
2173
2174 /* Free all resources allocated for toast reconstruction */
2176
2177 /*
2178 * For the streaming case, stop the stream and remember the command ID and
2179 * snapshot for the streaming run.
2180 */
2181 if (rbtxn_is_streamed(txn))
2182 {
2183 rb->stream_stop(rb, txn, last_lsn);
2184 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2185 }
2186
2187 /* All changes must be deallocated */
2188 Assert(txn->size == 0);
2189}

References Assert, fb(), rbtxn_is_prepared, rbtxn_is_streamed, ReorderBufferSaveTXNSnapshot(), ReorderBufferToastReset(), ReorderBufferTruncateTXN(), and ReorderBufferTXN::size.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferRestoreChange()

static void ReorderBufferRestoreChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
char data 
)
static

Definition at line 4683 of file reorderbuffer.c.

4685{
4687 ReorderBufferChange *change;
4688
4689 ondisk = (ReorderBufferDiskChange *) data;
4690
4691 change = ReorderBufferAllocChange(rb);
4692
4693 /* copy static part */
4694 memcpy(change, &ondisk->change, sizeof(ReorderBufferChange));
4695
4696 data += sizeof(ReorderBufferDiskChange);
4697
4698 /* restore individual stuff */
4699 switch (change->action)
4700 {
4701 /* fall through these, they're all similar enough */
4706 if (change->data.tp.oldtuple)
4707 {
4708 uint32 tuplelen = ((HeapTuple) data)->t_len;
4709
4710 change->data.tp.oldtuple =
4712
4713 /* restore ->tuple */
4714 memcpy(change->data.tp.oldtuple, data,
4715 sizeof(HeapTupleData));
4716 data += sizeof(HeapTupleData);
4717
4718 /* reset t_data pointer into the new tuplebuf */
4719 change->data.tp.oldtuple->t_data =
4720 (HeapTupleHeader) ((char *) change->data.tp.oldtuple + HEAPTUPLESIZE);
4721
4722 /* restore tuple data itself */
4724 data += tuplelen;
4725 }
4726
4727 if (change->data.tp.newtuple)
4728 {
4729 /* here, data might not be suitably aligned! */
4731
4733 sizeof(uint32));
4734
4735 change->data.tp.newtuple =
4737
4738 /* restore ->tuple */
4739 memcpy(change->data.tp.newtuple, data,
4740 sizeof(HeapTupleData));
4741 data += sizeof(HeapTupleData);
4742
4743 /* reset t_data pointer into the new tuplebuf */
4744 change->data.tp.newtuple->t_data =
4745 (HeapTupleHeader) ((char *) change->data.tp.newtuple + HEAPTUPLESIZE);
4746
4747 /* restore tuple data itself */
4749 data += tuplelen;
4750 }
4751
4752 break;
4754 {
4755 Size prefix_size;
4756
4757 /* read prefix */
4758 memcpy(&prefix_size, data, sizeof(Size));
4759 data += sizeof(Size);
4760 change->data.msg.prefix = MemoryContextAlloc(rb->context,
4761 prefix_size);
4762 memcpy(change->data.msg.prefix, data, prefix_size);
4763 Assert(change->data.msg.prefix[prefix_size - 1] == '\0');
4764 data += prefix_size;
4765
4766 /* read the message */
4767 memcpy(&change->data.msg.message_size, data, sizeof(Size));
4768 data += sizeof(Size);
4769 change->data.msg.message = MemoryContextAlloc(rb->context,
4770 change->data.msg.message_size);
4771 memcpy(change->data.msg.message, data,
4772 change->data.msg.message_size);
4773 data += change->data.msg.message_size;
4774
4775 break;
4776 }
4778 {
4780 change->data.inval.ninvalidations;
4781
4782 change->data.inval.invalidations =
4783 MemoryContextAlloc(rb->context, inval_size);
4784
4785 /* read the message */
4787
4788 break;
4789 }
4791 {
4794 Size size;
4795
4796 oldsnap = (Snapshot) data;
4797
4798 size = sizeof(SnapshotData) +
4799 sizeof(TransactionId) * oldsnap->xcnt +
4800 sizeof(TransactionId) * (oldsnap->subxcnt + 0);
4801
4802 change->data.snapshot = MemoryContextAllocZero(rb->context, size);
4803
4804 newsnap = change->data.snapshot;
4805
4806 memcpy(newsnap, data, size);
4807 newsnap->xip = (TransactionId *)
4808 (((char *) newsnap) + sizeof(SnapshotData));
4809 newsnap->subxip = newsnap->xip + newsnap->xcnt;
4810 newsnap->copied = true;
4811 break;
4812 }
4813 /* the base struct contains all the data, easy peasy */
4815 {
4816 Oid *relids;
4817
4818 relids = ReorderBufferAllocRelids(rb, change->data.truncate.nrelids);
4819 memcpy(relids, data, change->data.truncate.nrelids * sizeof(Oid));
4820 change->data.truncate.relids = relids;
4821
4822 break;
4823 }
4828 break;
4829 }
4830
4831 dlist_push_tail(&txn->changes, &change->node);
4832 txn->nentries_mem++;
4833
4834 /*
4835 * Update memory accounting for the restored change. We need to do this
4836 * although we don't check the memory limit when restoring the changes in
4837 * this branch (we only do that when initially queueing the changes after
4838 * decoding), because we will release the changes later, and that will
4839 * update the accounting too (subtracting the size from the counters). And
4840 * we don't want to underflow there.
4841 */
4843 ReorderBufferChangeSize(change));
4844}

References ReorderBufferChange::action, Assert, ReorderBufferDiskChange::change, ReorderBufferTXN::changes, ReorderBufferChange::data, data, dlist_push_tail(), fb(), HEAPTUPLESIZE, ReorderBufferChange::inval, ReorderBufferChange::invalidations, memcpy(), MemoryContextAlloc(), MemoryContextAllocZero(), ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferTXN::nentries_mem, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::node, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferAllocChange(), ReorderBufferAllocRelids(), ReorderBufferAllocTupleBuf(), ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), SizeofHeapTupleHeader, ReorderBufferChange::snapshot, HeapTupleData::t_data, ReorderBufferChange::tp, ReorderBufferChange::truncate, and SnapshotData::xcnt.

Referenced by ReorderBufferRestoreChanges().

◆ ReorderBufferRestoreChanges()

static Size ReorderBufferRestoreChanges ( ReorderBuffer rb,
ReorderBufferTXN txn,
TXNEntryFile file,
XLogSegNo segno 
)
static

Definition at line 4540 of file reorderbuffer.c.

4542{
4543 Size restored = 0;
4546 File *fd = &file->vfd;
4547
4550
4551 /* free current entries, so we have memory for more */
4553 {
4556
4557 dlist_delete(&cleanup->node);
4559 }
4560 txn->nentries_mem = 0;
4562
4564
4565 while (restored < max_changes_in_memory && *segno <= last_segno)
4566 {
4567 int readBytes;
4569
4571
4572 if (*fd == -1)
4573 {
4574 char path[MAXPGPATH];
4575
4576 /* first time in */
4577 if (*segno == 0)
4578 XLByteToSeg(txn->first_lsn, *segno, wal_segment_size);
4579
4580 Assert(*segno != 0 || dlist_is_empty(&txn->changes));
4581
4582 /*
4583 * No need to care about TLIs here, only used during a single run,
4584 * so each LSN only maps to a specific WAL record.
4585 */
4587 *segno);
4588
4590
4591 /* No harm in resetting the offset even in case of failure */
4592 file->curOffset = 0;
4593
4594 if (*fd < 0 && errno == ENOENT)
4595 {
4596 *fd = -1;
4597 (*segno)++;
4598 continue;
4599 }
4600 else if (*fd < 0)
4601 ereport(ERROR,
4603 errmsg("could not open file \"%s\": %m",
4604 path)));
4605 }
4606
4607 /*
4608 * Read the statically sized part of a change which has information
4609 * about the total size. If we couldn't read a record, we're at the
4610 * end of this file.
4611 */
4613 readBytes = FileRead(file->vfd, rb->outbuf,
4616
4617 /* eof */
4618 if (readBytes == 0)
4619 {
4620 FileClose(*fd);
4621 *fd = -1;
4622 (*segno)++;
4623 continue;
4624 }
4625 else if (readBytes < 0)
4626 ereport(ERROR,
4628 errmsg("could not read from reorderbuffer spill file: %m")));
4629 else if (readBytes != sizeof(ReorderBufferDiskChange))
4630 ereport(ERROR,
4632 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4633 readBytes,
4634 (uint32) sizeof(ReorderBufferDiskChange))));
4635
4636 file->curOffset += readBytes;
4637
4638 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4639
4641 sizeof(ReorderBufferDiskChange) + ondisk->size);
4642 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4643
4644 readBytes = FileRead(file->vfd,
4645 rb->outbuf + sizeof(ReorderBufferDiskChange),
4646 ondisk->size - sizeof(ReorderBufferDiskChange),
4647 file->curOffset,
4649
4650 if (readBytes < 0)
4651 ereport(ERROR,
4653 errmsg("could not read from reorderbuffer spill file: %m")));
4654 else if (readBytes != ondisk->size - sizeof(ReorderBufferDiskChange))
4655 ereport(ERROR,
4657 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4658 readBytes,
4659 (uint32) (ondisk->size - sizeof(ReorderBufferDiskChange)))));
4660
4661 file->curOffset += readBytes;
4662
4663 /*
4664 * ok, read a full change from disk, now restore it into proper
4665 * in-memory format
4666 */
4667 ReorderBufferRestoreChange(rb, txn, rb->outbuf);
4668 restored++;
4669 }
4670
4671 return restored;
4672}

References Assert, ReorderBufferTXN::changes, CHECK_FOR_INTERRUPTS, cleanup(), TXNEntryFile::curOffset, dlist_container, dlist_delete(), dlist_foreach_modify, dlist_is_empty(), ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), FileClose(), FileRead(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::first_lsn, max_changes_in_memory, MAXPGPATH, MyReplicationSlot, ReorderBufferTXN::nentries_mem, PathNameOpenFile(), PG_BINARY, ReorderBufferFreeChange(), ReorderBufferRestoreChange(), ReorderBufferSerializedPath(), ReorderBufferSerializeReserve(), ReorderBufferDiskChange::size, TXNEntryFile::vfd, wal_segment_size, ReorderBufferTXN::xid, XLByteToSeg, and XLogRecPtrIsValid.

Referenced by ReorderBufferIterTXNInit(), and ReorderBufferIterTXNNext().

◆ ReorderBufferRestoreCleanup()

static void ReorderBufferRestoreCleanup ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4850 of file reorderbuffer.c.

4851{
4852 XLogSegNo first;
4853 XLogSegNo cur;
4854 XLogSegNo last;
4855
4858
4861
4862 /* iterate over all possible filenames, and delete them */
4863 for (cur = first; cur <= last; cur++)
4864 {
4865 char path[MAXPGPATH];
4866
4868 if (unlink(path) != 0 && errno != ENOENT)
4869 ereport(ERROR,
4871 errmsg("could not remove file \"%s\": %m", path)));
4872 }
4873}

References Assert, cur, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::first_lsn, MAXPGPATH, MyReplicationSlot, ReorderBufferSerializedPath(), wal_segment_size, ReorderBufferTXN::xid, XLByteToSeg, and XLogRecPtrIsValid.

Referenced by ReorderBufferCleanupTXN(), and ReorderBufferTruncateTXN().

◆ ReorderBufferSaveTXNSnapshot()

static void ReorderBufferSaveTXNSnapshot ( ReorderBuffer rb,
ReorderBufferTXN txn,
Snapshot  snapshot_now,
CommandId  command_id 
)
inlinestatic

Definition at line 2121 of file reorderbuffer.c.

2123{
2124 txn->command_id = command_id;
2125
2126 /* Avoid copying if it's already copied. */
2127 if (snapshot_now->copied)
2128 txn->snapshot_now = snapshot_now;
2129 else
2130 txn->snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2131 txn, command_id);
2132}

References ReorderBufferTXN::command_id, SnapshotData::copied, fb(), ReorderBufferCopySnap(), and ReorderBufferTXN::snapshot_now.

Referenced by ReorderBufferProcessTXN(), and ReorderBufferResetTXN().

◆ ReorderBufferSerializeChange()

static void ReorderBufferSerializeChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
int  fd,
ReorderBufferChange change 
)
static

Definition at line 4088 of file reorderbuffer.c.

4090{
4093
4095
4096 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4097 memcpy(&ondisk->change, change, sizeof(ReorderBufferChange));
4098
4099 switch (change->action)
4100 {
4101 /* fall through these, they're all similar enough */
4106 {
4107 char *data;
4109 newtup;
4110 Size oldlen = 0;
4111 Size newlen = 0;
4112
4113 oldtup = change->data.tp.oldtuple;
4114 newtup = change->data.tp.newtuple;
4115
4116 if (oldtup)
4117 {
4118 sz += sizeof(HeapTupleData);
4119 oldlen = oldtup->t_len;
4120 sz += oldlen;
4121 }
4122
4123 if (newtup)
4124 {
4125 sz += sizeof(HeapTupleData);
4126 newlen = newtup->t_len;
4127 sz += newlen;
4128 }
4129
4130 /* make sure we have enough space */
4132
4133 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4134 /* might have been reallocated above */
4135 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4136
4137 if (oldlen)
4138 {
4139 memcpy(data, oldtup, sizeof(HeapTupleData));
4140 data += sizeof(HeapTupleData);
4141
4142 memcpy(data, oldtup->t_data, oldlen);
4143 data += oldlen;
4144 }
4145
4146 if (newlen)
4147 {
4148 memcpy(data, newtup, sizeof(HeapTupleData));
4149 data += sizeof(HeapTupleData);
4150
4151 memcpy(data, newtup->t_data, newlen);
4152 data += newlen;
4153 }
4154 break;
4155 }
4157 {
4158 char *data;
4159 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4160
4161 sz += prefix_size + change->data.msg.message_size +
4162 sizeof(Size) + sizeof(Size);
4164
4165 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4166
4167 /* might have been reallocated above */
4168 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4169
4170 /* write the prefix including the size */
4171 memcpy(data, &prefix_size, sizeof(Size));
4172 data += sizeof(Size);
4173 memcpy(data, change->data.msg.prefix,
4174 prefix_size);
4175 data += prefix_size;
4176
4177 /* write the message including the size */
4178 memcpy(data, &change->data.msg.message_size, sizeof(Size));
4179 data += sizeof(Size);
4180 memcpy(data, change->data.msg.message,
4181 change->data.msg.message_size);
4182 data += change->data.msg.message_size;
4183
4184 break;
4185 }
4187 {
4188 char *data;
4190 change->data.inval.ninvalidations;
4191
4192 sz += inval_size;
4193
4195 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4196
4197 /* might have been reallocated above */
4198 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4200 data += inval_size;
4201
4202 break;
4203 }
4205 {
4206 Snapshot snap;
4207 char *data;
4208
4209 snap = change->data.snapshot;
4210
4211 sz += sizeof(SnapshotData) +
4212 sizeof(TransactionId) * snap->xcnt +
4213 sizeof(TransactionId) * snap->subxcnt;
4214
4215 /* make sure we have enough space */
4217 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4218 /* might have been reallocated above */
4219 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4220
4221 memcpy(data, snap, sizeof(SnapshotData));
4222 data += sizeof(SnapshotData);
4223
4224 if (snap->xcnt)
4225 {
4226 memcpy(data, snap->xip,
4227 sizeof(TransactionId) * snap->xcnt);
4228 data += sizeof(TransactionId) * snap->xcnt;
4229 }
4230
4231 if (snap->subxcnt)
4232 {
4233 memcpy(data, snap->subxip,
4234 sizeof(TransactionId) * snap->subxcnt);
4235 data += sizeof(TransactionId) * snap->subxcnt;
4236 }
4237 break;
4238 }
4240 {
4241 Size size;
4242 char *data;
4243
4244 /* account for the OIDs of truncated relations */
4245 size = sizeof(Oid) * change->data.truncate.nrelids;
4246 sz += size;
4247
4248 /* make sure we have enough space */
4250
4251 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4252 /* might have been reallocated above */
4253 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4254
4255 memcpy(data, change->data.truncate.relids, size);
4256 data += size;
4257
4258 break;
4259 }
4264 /* ReorderBufferChange contains everything important */
4265 break;
4266 }
4267
4268 ondisk->size = sz;
4269
4270 errno = 0;
4272 if (write(fd, rb->outbuf, ondisk->size) != ondisk->size)
4273 {
4274 int save_errno = errno;
4275
4277
4278 /* if write didn't set errno, assume problem is no disk space */
4280 ereport(ERROR,
4282 errmsg("could not write to data file for XID %u: %m",
4283 txn->xid)));
4284 }
4286
4287 /*
4288 * Keep the transaction's final_lsn up to date with each change we send to
4289 * disk, so that ReorderBufferRestoreCleanup works correctly. (We used to
4290 * only do this on commit and abort records, but that doesn't work if a
4291 * system crash leaves a transaction without its abort record).
4292 *
4293 * Make sure not to move it backwards.
4294 */
4295 if (txn->final_lsn < change->lsn)
4296 txn->final_lsn = change->lsn;
4297
4298 Assert(ondisk->change.action == change->action);
4299}

References ReorderBufferChange::action, Assert, ReorderBufferDiskChange::change, CloseTransientFile(), ReorderBufferChange::data, data, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), ReorderBufferTXN::final_lsn, ReorderBufferChange::inval, ReorderBufferChange::invalidations, ReorderBufferChange::lsn, memcpy(), ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, pgstat_report_wait_end(), pgstat_report_wait_start(), ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferSerializeReserve(), ReorderBufferDiskChange::size, ReorderBufferChange::snapshot, HeapTupleData::t_len, ReorderBufferChange::tp, ReorderBufferChange::truncate, write, SnapshotData::xcnt, and ReorderBufferTXN::xid.

Referenced by ReorderBufferSerializeTXN().

◆ ReorderBufferSerializedPath()

◆ ReorderBufferSerializeReserve()

static void ReorderBufferSerializeReserve ( ReorderBuffer rb,
Size  sz 
)
static

Definition at line 3768 of file reorderbuffer.c.

3769{
3770 if (!rb->outbufsize)
3771 {
3772 rb->outbuf = MemoryContextAlloc(rb->context, sz);
3773 rb->outbufsize = sz;
3774 }
3775 else if (rb->outbufsize < sz)
3776 {
3777 rb->outbuf = repalloc(rb->outbuf, sz);
3778 rb->outbufsize = sz;
3779 }
3780}

References fb(), MemoryContextAlloc(), and repalloc().

Referenced by ReorderBufferRestoreChanges(), and ReorderBufferSerializeChange().

◆ ReorderBufferSerializeTXN()

static void ReorderBufferSerializeTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 3993 of file reorderbuffer.c.

3994{
3997 int fd = -1;
3999 Size spilled = 0;
4000 Size size = txn->size;
4001
4002 elog(DEBUG2, "spill %u changes in XID %u to disk",
4003 (uint32) txn->nentries_mem, txn->xid);
4004
4005 /* do the same to all child TXs */
4007 {
4009
4012 }
4013
4014 /* serialize changestream */
4016 {
4017 ReorderBufferChange *change;
4018
4019 change = dlist_container(ReorderBufferChange, node, change_i.cur);
4020
4021 /*
4022 * store in segment in which it belongs by start lsn, don't split over
4023 * multiple segments tho
4024 */
4025 if (fd == -1 ||
4027 {
4028 char path[MAXPGPATH];
4029
4030 if (fd != -1)
4032
4034
4035 /*
4036 * No need to care about TLIs here, only used during a single run,
4037 * so each LSN only maps to a specific WAL record.
4038 */
4040 curOpenSegNo);
4041
4042 /* open segment, create it if necessary */
4043 fd = OpenTransientFile(path,
4045
4046 if (fd < 0)
4047 ereport(ERROR,
4049 errmsg("could not open file \"%s\": %m", path)));
4050 }
4051
4052 ReorderBufferSerializeChange(rb, txn, fd, change);
4053 dlist_delete(&change->node);
4054 ReorderBufferFreeChange(rb, change, false);
4055
4056 spilled++;
4057 }
4058
4059 /* Update the memory counter */
4060 ReorderBufferChangeMemoryUpdate(rb, NULL, txn, false, size);
4061
4062 /* update the statistics iff we have spilled anything */
4063 if (spilled)
4064 {
4065 rb->spillCount += 1;
4066 rb->spillBytes += size;
4067
4068 /* don't consider already serialized transactions */
4069 rb->spillTxns += (rbtxn_is_serialized(txn) || rbtxn_is_serialized_clear(txn)) ? 0 : 1;
4070
4071 /* update the decoding stats */
4073 }
4074
4075 Assert(spilled == txn->nentries_mem);
4077 txn->nentries_mem = 0;
4079
4080 if (fd != -1)
4082}

References Assert, ReorderBufferTXN::changes, CloseTransientFile(), DEBUG2, dlist_container, dlist_delete(), dlist_foreach, dlist_foreach_modify, dlist_is_empty(), elog, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), ReorderBufferChange::lsn, MAXPGPATH, MyReplicationSlot, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, OpenTransientFile(), PG_BINARY, RBTXN_IS_SERIALIZED, rbtxn_is_serialized, rbtxn_is_serialized_clear, ReorderBufferChangeMemoryUpdate(), ReorderBufferFreeChange(), ReorderBufferSerializeChange(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReorderBufferTXN::size, ReorderBufferTXN::subtxns, ReorderBufferTXN::txn_flags, UpdateDecodingStats(), wal_segment_size, ReorderBufferTXN::xid, XLByteInSeg, and XLByteToSeg.

Referenced by ReorderBufferCheckMemoryLimit(), ReorderBufferIterTXNInit(), and ReorderBufferSerializeTXN().

◆ ReorderBufferSetBaseSnapshot()

void ReorderBufferSetBaseSnapshot ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Snapshot  snap 
)

Definition at line 3322 of file reorderbuffer.c.

3324{
3325 ReorderBufferTXN *txn;
3326 bool is_new;
3327
3328 Assert(snap != NULL);
3329
3330 /*
3331 * Fetch the transaction to operate on. If we know it's a subtransaction,
3332 * operate on its top-level transaction instead.
3333 */
3334 txn = ReorderBufferTXNByXid(rb, xid, true, &is_new, lsn, true);
3335 if (rbtxn_is_known_subxact(txn))
3336 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3337 NULL, InvalidXLogRecPtr, false);
3338 Assert(txn->base_snapshot == NULL);
3339
3340 txn->base_snapshot = snap;
3341 txn->base_snapshot_lsn = lsn;
3342 dlist_push_tail(&rb->txns_by_base_snapshot_lsn, &txn->base_snapshot_node);
3343
3345}

References Assert, AssertTXNLsnOrder(), ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_lsn, ReorderBufferTXN::base_snapshot_node, dlist_push_tail(), fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, ReorderBufferTXNByXid(), and ReorderBufferTXN::toplevel_xid.

Referenced by SnapBuildCommitTxn(), and SnapBuildProcessChange().

◆ ReorderBufferSetRestartPoint()

void ReorderBufferSetRestartPoint ( ReorderBuffer rb,
XLogRecPtr  ptr 
)

Definition at line 1088 of file reorderbuffer.c.

1089{
1090 rb->current_restart_decoding_lsn = ptr;
1091}

References fb().

Referenced by SnapBuildRestore(), and SnapBuildSerialize().

◆ ReorderBufferSkipPrepare()

void ReorderBufferSkipPrepare ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 2935 of file reorderbuffer.c.

2936{
2937 ReorderBufferTXN *txn;
2938
2939 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2940
2941 /* unknown transaction, nothing to do */
2942 if (txn == NULL)
2943 return;
2944
2945 /* txn must have been marked as a prepared transaction */
2948}

References Assert, fb(), InvalidXLogRecPtr, RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SKIPPED_PREPARE, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by DecodePrepare().

◆ ReorderBufferStreamCommit()

static void ReorderBufferStreamCommit ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1984 of file reorderbuffer.c.

1985{
1986 /* we should only call this for previously streamed transactions */
1988
1990
1991 if (rbtxn_is_prepared(txn))
1992 {
1993 /*
1994 * Note, we send stream prepare even if a concurrent abort is
1995 * detected. See DecodePrepare for more information.
1996 */
1998 rb->stream_prepare(rb, txn, txn->final_lsn);
2000
2001 /*
2002 * This is a PREPARED transaction, part of a two-phase commit. The
2003 * full cleanup will happen as part of the COMMIT PREPAREDs, so now
2004 * just truncate txn by removing changes and tuplecids.
2005 */
2006 ReorderBufferTruncateTXN(rb, txn, true);
2007 /* Reset the CheckXidAlive */
2009 }
2010 else
2011 {
2012 rb->stream_commit(rb, txn, txn->final_lsn);
2014 }
2015}

References Assert, CheckXidAlive, fb(), ReorderBufferTXN::final_lsn, InvalidTransactionId, rbtxn_is_prepared, rbtxn_is_streamed, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, ReorderBufferCleanupTXN(), ReorderBufferStreamTXN(), ReorderBufferTruncateTXN(), and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferReplay().

◆ ReorderBufferStreamTXN()

static void ReorderBufferStreamTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4338 of file reorderbuffer.c.

4339{
4340 Snapshot snapshot_now;
4341 CommandId command_id;
4342 Size stream_bytes;
4343 bool txn_is_streamed;
4344
4345 /* We can never reach here for a subtransaction. */
4346 Assert(rbtxn_is_toptxn(txn));
4347
4348 /*
4349 * We can't make any assumptions about base snapshot here, similar to what
4350 * ReorderBufferCommit() does. That relies on base_snapshot getting
4351 * transferred from subxact in ReorderBufferCommitChild(), but that was
4352 * not yet called as the transaction is in-progress.
4353 *
4354 * So just walk the subxacts and use the same logic here. But we only need
4355 * to do that once, when the transaction is streamed for the first time.
4356 * After that we need to reuse the snapshot from the previous run.
4357 *
4358 * Unlike DecodeCommit which adds xids of all the subtransactions in
4359 * snapshot's xip array via SnapBuildCommitTxn, we can't do that here but
4360 * we do add them to subxip array instead via ReorderBufferCopySnap. This
4361 * allows the catalog changes made in subtransactions decoded till now to
4362 * be visible.
4363 */
4364 if (txn->snapshot_now == NULL)
4365 {
4367
4368 /* make sure this transaction is streamed for the first time */
4370
4371 /* at the beginning we should have invalid command ID */
4373
4375 {
4377
4380 }
4381
4382 /*
4383 * If this transaction has no snapshot, it didn't make any changes to
4384 * the database till now, so there's nothing to decode.
4385 */
4386 if (txn->base_snapshot == NULL)
4387 {
4388 Assert(txn->ninvalidations == 0);
4389 return;
4390 }
4391
4392 command_id = FirstCommandId;
4393 snapshot_now = ReorderBufferCopySnap(rb, txn->base_snapshot,
4394 txn, command_id);
4395 }
4396 else
4397 {
4398 /* the transaction must have been already streamed */
4400
4401 /*
4402 * Nah, we already have snapshot from the previous streaming run. We
4403 * assume new subxacts can't move the LSN backwards, and so can't beat
4404 * the LSN condition in the previous branch (so no need to walk
4405 * through subxacts again). In fact, we must not do that as we may be
4406 * using snapshot half-way through the subxact.
4407 */
4408 command_id = txn->command_id;
4409
4410 /*
4411 * We can't use txn->snapshot_now directly because after the last
4412 * streaming run, we might have got some new sub-transactions. So we
4413 * need to add them to the snapshot.
4414 */
4415 snapshot_now = ReorderBufferCopySnap(rb, txn->snapshot_now,
4416 txn, command_id);
4417
4418 /* Free the previously copied snapshot. */
4419 Assert(txn->snapshot_now->copied);
4421 txn->snapshot_now = NULL;
4422 }
4423
4424 /*
4425 * Remember this information to be used later to update stats. We can't
4426 * update the stats here as an error while processing the changes would
4427 * lead to the accumulation of stats even though we haven't streamed all
4428 * the changes.
4429 */
4431 stream_bytes = txn->total_size;
4432
4433 /* Process and send the changes to output plugin. */
4434 ReorderBufferProcessTXN(rb, txn, InvalidXLogRecPtr, snapshot_now,
4435 command_id, true);
4436
4437 rb->streamCount += 1;
4438 rb->streamBytes += stream_bytes;
4439
4440 /* Don't consider already streamed transaction. */
4441 rb->streamTxns += (txn_is_streamed) ? 0 : 1;
4442
4443 /* update the decoding stats */
4445
4447 Assert(txn->nentries == 0);
4448 Assert(txn->nentries_mem == 0);
4449}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::changes, ReorderBufferTXN::command_id, SnapshotData::copied, dlist_container, dlist_foreach, dlist_is_empty(), fb(), FirstCommandId, InvalidCommandId, InvalidXLogRecPtr, ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, rbtxn_is_toptxn, ReorderBufferCopySnap(), ReorderBufferFreeSnap(), ReorderBufferProcessTXN(), ReorderBufferTransferSnapToParent(), ReorderBufferTXN::snapshot_now, ReorderBufferTXN::subtxns, ReorderBufferTXN::total_size, and UpdateDecodingStats().

Referenced by ReorderBufferCheckMemoryLimit(), ReorderBufferProcessPartialChange(), and ReorderBufferStreamCommit().

◆ ReorderBufferToastAppendChunk()

static void ReorderBufferToastAppendChunk ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change 
)
static

Definition at line 4991 of file reorderbuffer.c.

4993{
4996 bool found;
4998 bool isnull;
4999 Pointer chunk;
5000 TupleDesc desc = RelationGetDescr(relation);
5001 Oid chunk_id;
5003
5004 if (txn->toast_hash == NULL)
5006
5007 Assert(IsToastRelation(relation));
5008
5009 newtup = change->data.tp.newtuple;
5010 chunk_id = DatumGetObjectId(fastgetattr(newtup, 1, desc, &isnull));
5011 Assert(!isnull);
5012 chunk_seq = DatumGetInt32(fastgetattr(newtup, 2, desc, &isnull));
5013 Assert(!isnull);
5014
5016 hash_search(txn->toast_hash, &chunk_id, HASH_ENTER, &found);
5017
5018 if (!found)
5019 {
5020 Assert(ent->chunk_id == chunk_id);
5021 ent->num_chunks = 0;
5022 ent->last_chunk_seq = 0;
5023 ent->size = 0;
5024 ent->reconstructed = NULL;
5025 dlist_init(&ent->chunks);
5026
5027 if (chunk_seq != 0)
5028 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq 0",
5029 chunk_seq, chunk_id);
5030 }
5031 else if (found && chunk_seq != ent->last_chunk_seq + 1)
5032 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq %d",
5033 chunk_seq, chunk_id, ent->last_chunk_seq + 1);
5034
5035 chunk = DatumGetPointer(fastgetattr(newtup, 3, desc, &isnull));
5036 Assert(!isnull);
5037
5038 /* calculate size so we can allocate the right size at once later */
5039 if (!VARATT_IS_EXTENDED(chunk))
5040 chunksize = VARSIZE(chunk) - VARHDRSZ;
5041 else if (VARATT_IS_SHORT(chunk))
5042 /* could happen due to heap_form_tuple doing its thing */
5044 else
5045 elog(ERROR, "unexpected type of toast chunk");
5046
5047 ent->size += chunksize;
5048 ent->last_chunk_seq = chunk_seq;
5049 ent->num_chunks++;
5050 dlist_push_tail(&ent->chunks, &change->node);
5051}

References Assert, ReorderBufferChange::data, DatumGetInt32(), DatumGetObjectId(), DatumGetPointer(), dlist_init(), dlist_push_tail(), elog, ERROR, fastgetattr(), fb(), HASH_ENTER, hash_search(), IsToastRelation(), ReorderBufferChange::newtuple, ReorderBufferChange::node, RelationGetDescr, ReorderBufferToastInitHash(), ReorderBufferTXN::toast_hash, ReorderBufferChange::tp, VARATT_IS_EXTENDED(), VARATT_IS_SHORT(), VARHDRSZ, VARHDRSZ_SHORT, VARSIZE(), and VARSIZE_SHORT().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferToastInitHash()

static void ReorderBufferToastInitHash ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4971 of file reorderbuffer.c.

4972{
4974
4975 Assert(txn->toast_hash == NULL);
4976
4977 hash_ctl.keysize = sizeof(Oid);
4978 hash_ctl.entrysize = sizeof(ReorderBufferToastEnt);
4979 hash_ctl.hcxt = rb->context;
4980 txn->toast_hash = hash_create("ReorderBufferToastHash", 5, &hash_ctl,
4982}

References Assert, fb(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, and ReorderBufferTXN::toast_hash.

Referenced by ReorderBufferToastAppendChunk().

◆ ReorderBufferToastReplace()

static void ReorderBufferToastReplace ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change 
)
static

Definition at line 5074 of file reorderbuffer.c.

5076{
5077 TupleDesc desc;
5078 int natt;
5079 Datum *attrs;
5080 bool *isnull;
5081 bool *free;
5083 Relation toast_rel;
5085 MemoryContext oldcontext;
5087 Size old_size;
5088
5089 /* no toast tuples changed */
5090 if (txn->toast_hash == NULL)
5091 return;
5092
5093 /*
5094 * We're going to modify the size of the change. So, to make sure the
5095 * accounting is correct we record the current change size and then after
5096 * re-computing the change we'll subtract the recorded size and then
5097 * re-add the new change size at the end. We don't immediately subtract
5098 * the old size because if there is any error before we add the new size,
5099 * we will release the changes and that will update the accounting info
5100 * (subtracting the size from the counters). And we don't want to
5101 * underflow there.
5102 */
5104
5105 oldcontext = MemoryContextSwitchTo(rb->context);
5106
5107 /* we should only have toast tuples in an INSERT or UPDATE */
5108 Assert(change->data.tp.newtuple);
5109
5110 desc = RelationGetDescr(relation);
5111
5112 toast_rel = RelationIdGetRelation(relation->rd_rel->reltoastrelid);
5113 if (!RelationIsValid(toast_rel))
5114 elog(ERROR, "could not open toast relation with OID %u (base relation \"%s\")",
5115 relation->rd_rel->reltoastrelid, RelationGetRelationName(relation));
5116
5117 toast_desc = RelationGetDescr(toast_rel);
5118
5119 /* should we allocate from stack instead? */
5120 attrs = palloc0_array(Datum, desc->natts);
5121 isnull = palloc0_array(bool, desc->natts);
5122 free = palloc0_array(bool, desc->natts);
5123
5124 newtup = change->data.tp.newtuple;
5125
5126 heap_deform_tuple(newtup, desc, attrs, isnull);
5127
5128 for (natt = 0; natt < desc->natts; natt++)
5129 {
5133
5134 /* va_rawsize is the size of the original datum -- including header */
5135 varatt_external toast_pointer;
5138 varlena *reconstructed;
5139 dlist_iter it;
5140 Size data_done = 0;
5141
5142 if (attr->attisdropped)
5143 continue;
5144
5145 /* not a varlena datatype */
5146 if (attr->attlen != -1)
5147 continue;
5148
5149 /* no data */
5150 if (isnull[natt])
5151 continue;
5152
5153 /* ok, we know we have a toast datum */
5155
5156 /* no need to do anything if the tuple isn't external */
5158 continue;
5159
5161
5162 /*
5163 * Check whether the toast tuple changed, replace if so.
5164 */
5167 &toast_pointer.va_valueid,
5168 HASH_FIND,
5169 NULL);
5170 if (ent == NULL)
5171 continue;
5172
5173 new_datum =
5175
5176 free[natt] = true;
5177
5178 reconstructed = palloc0(toast_pointer.va_rawsize);
5179
5180 ent->reconstructed = reconstructed;
5181
5182 /* stitch toast tuple back together from its parts */
5183 dlist_foreach(it, &ent->chunks)
5184 {
5185 bool cisnull;
5188 Pointer chunk;
5189
5191 ctup = cchange->data.tp.newtuple;
5193
5194 Assert(!cisnull);
5195 Assert(!VARATT_IS_EXTERNAL(chunk));
5196 Assert(!VARATT_IS_SHORT(chunk));
5197
5198 memcpy(VARDATA(reconstructed) + data_done,
5199 VARDATA(chunk),
5200 VARSIZE(chunk) - VARHDRSZ);
5201 data_done += VARSIZE(chunk) - VARHDRSZ;
5202 }
5203 Assert(data_done == VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer));
5204
5205 /* make sure its marked as compressed or not */
5206 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
5207 SET_VARSIZE_COMPRESSED(reconstructed, data_done + VARHDRSZ);
5208 else
5209 SET_VARSIZE(reconstructed, data_done + VARHDRSZ);
5210
5212 redirect_pointer.pointer = reconstructed;
5213
5216 sizeof(redirect_pointer));
5217
5219 }
5220
5221 /*
5222 * Build tuple in separate memory & copy tuple back into the tuplebuf
5223 * passed to the output plugin. We can't directly heap_fill_tuple() into
5224 * the tuplebuf because attrs[] will point back into the current content.
5225 */
5226 tmphtup = heap_form_tuple(desc, attrs, isnull);
5227 Assert(newtup->t_len <= MaxHeapTupleSize);
5228 Assert(newtup->t_data == (HeapTupleHeader) ((char *) newtup + HEAPTUPLESIZE));
5229
5230 memcpy(newtup->t_data, tmphtup->t_data, tmphtup->t_len);
5231 newtup->t_len = tmphtup->t_len;
5232
5233 /*
5234 * free resources we won't further need, more persistent stuff will be
5235 * free'd in ReorderBufferToastReset().
5236 */
5237 RelationClose(toast_rel);
5238 pfree(tmphtup);
5239 for (natt = 0; natt < desc->natts; natt++)
5240 {
5241 if (free[natt])
5243 }
5244 pfree(attrs);
5245 pfree(free);
5246 pfree(isnull);
5247
5248 MemoryContextSwitchTo(oldcontext);
5249
5250 /* subtract the old change size */
5252 /* now add the change back, with the correct size */
5254 ReorderBufferChangeSize(change));
5255}

References Assert, CompactAttribute::attisdropped, CompactAttribute::attlen, ReorderBufferChange::data, DatumGetPointer(), dlist_container, dlist_foreach, elog, ERROR, fastgetattr(), fb(), free, HASH_FIND, hash_search(), heap_deform_tuple(), heap_form_tuple(), HEAPTUPLESIZE, INDIRECT_POINTER_SIZE, MaxHeapTupleSize, memcpy(), MemoryContextSwitchTo(), TupleDescData::natts, ReorderBufferChange::newtuple, palloc0(), palloc0_array, pfree(), PointerGetDatum, RelationData::rd_rel, RelationClose(), RelationGetDescr, RelationGetRelationName, RelationIdGetRelation(), RelationIsValid, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), SET_VARSIZE(), SET_VARSIZE_COMPRESSED(), SET_VARTAG_EXTERNAL(), ReorderBufferTXN::toast_hash, ReorderBufferChange::tp, TupleDescCompactAttr(), varatt_external::va_rawsize, varatt_external::va_valueid, VARATT_EXTERNAL_GET_EXTSIZE(), VARATT_EXTERNAL_GET_POINTER, VARATT_EXTERNAL_IS_COMPRESSED(), VARATT_IS_EXTERNAL(), VARATT_IS_SHORT(), VARDATA(), VARDATA_EXTERNAL(), VARHDRSZ, VARSIZE(), and VARTAG_INDIRECT.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferToastReset()

static void ReorderBufferToastReset ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 5261 of file reorderbuffer.c.

5262{
5265
5266 if (txn->toast_hash == NULL)
5267 return;
5268
5269 /* sequentially walk over the hash and free everything */
5272 {
5274
5275 if (ent->reconstructed != NULL)
5276 pfree(ent->reconstructed);
5277
5278 dlist_foreach_modify(it, &ent->chunks)
5279 {
5280 ReorderBufferChange *change =
5282
5283 dlist_delete(&change->node);
5284 ReorderBufferFreeChange(rb, change, true);
5285 }
5286 }
5287
5289 txn->toast_hash = NULL;
5290}

References dlist_container, dlist_delete(), dlist_foreach_modify, fb(), hash_destroy(), hash_seq_init(), hash_seq_search(), ReorderBufferChange::node, pfree(), ReorderBufferFreeChange(), and ReorderBufferTXN::toast_hash.

Referenced by ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferFreeTXN(), ReorderBufferProcessTXN(), and ReorderBufferResetTXN().

◆ ReorderBufferTransferSnapToParent()

static void ReorderBufferTransferSnapToParent ( ReorderBufferTXN txn,
ReorderBufferTXN subtxn 
)
static

Definition at line 1166 of file reorderbuffer.c.

1168{
1169 Assert(subtxn->toplevel_xid == txn->xid);
1170
1171 if (subtxn->base_snapshot != NULL)
1172 {
1173 if (txn->base_snapshot == NULL ||
1174 subtxn->base_snapshot_lsn < txn->base_snapshot_lsn)
1175 {
1176 /*
1177 * If the toplevel transaction already has a base snapshot but
1178 * it's newer than the subxact's, purge it.
1179 */
1180 if (txn->base_snapshot != NULL)
1181 {
1184 }
1185
1186 /*
1187 * The snapshot is now the top transaction's; transfer it, and
1188 * adjust the list position of the top transaction in the list by
1189 * moving it to where the subtransaction is.
1190 */
1191 txn->base_snapshot = subtxn->base_snapshot;
1192 txn->base_snapshot_lsn = subtxn->base_snapshot_lsn;
1193 dlist_insert_before(&subtxn->base_snapshot_node,
1194 &txn->base_snapshot_node);
1195
1196 /*
1197 * The subtransaction doesn't have a snapshot anymore (so it
1198 * mustn't be in the list.)
1199 */
1200 subtxn->base_snapshot = NULL;
1201 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1202 dlist_delete(&subtxn->base_snapshot_node);
1203 }
1204 else
1205 {
1206 /* Base snap of toplevel is fine, so subxact's is not needed */
1207 SnapBuildSnapDecRefcount(subtxn->base_snapshot);
1208 dlist_delete(&subtxn->base_snapshot_node);
1209 subtxn->base_snapshot = NULL;
1210 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1211 }
1212 }
1213}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_lsn, ReorderBufferTXN::base_snapshot_node, dlist_delete(), dlist_insert_before(), fb(), InvalidXLogRecPtr, SnapBuildSnapDecRefcount(), and ReorderBufferTXN::xid.

Referenced by ReorderBufferAssignChild(), and ReorderBufferStreamTXN().

◆ ReorderBufferTruncateTXN()

static void ReorderBufferTruncateTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
bool  txn_prepared 
)
static

Definition at line 1657 of file reorderbuffer.c.

1658{
1659 dlist_mutable_iter iter;
1660 Size mem_freed = 0;
1661
1662 /* cleanup subtransactions & their changes */
1663 dlist_foreach_modify(iter, &txn->subtxns)
1664 {
1666
1668
1669 /*
1670 * Subtransactions are always associated to the toplevel TXN, even if
1671 * they originally were happening inside another subtxn, so we won't
1672 * ever recurse more than one level deep here.
1673 */
1675 Assert(subtxn->nsubtxns == 0);
1676
1679 }
1680
1681 /* cleanup changes in the txn */
1682 dlist_foreach_modify(iter, &txn->changes)
1683 {
1684 ReorderBufferChange *change;
1685
1686 change = dlist_container(ReorderBufferChange, node, iter.cur);
1687
1688 /* Check we're not mixing changes from different transactions. */
1689 Assert(change->txn == txn);
1690
1691 /* remove the change from its containing list */
1692 dlist_delete(&change->node);
1693
1694 /*
1695 * Instead of updating the memory counter for individual changes, we
1696 * sum up the size of memory to free so we can update the memory
1697 * counter all together below. This saves costs of maintaining the
1698 * max-heap.
1699 */
1701
1702 ReorderBufferFreeChange(rb, change, false);
1703 }
1704
1705 /* Update the memory counter */
1707
1708 if (txn_prepared)
1709 {
1710 /*
1711 * If this is a prepared txn, cleanup the tuplecids we stored for
1712 * decoding catalog snapshot access. They are always stored in the
1713 * toplevel transaction.
1714 */
1715 dlist_foreach_modify(iter, &txn->tuplecids)
1716 {
1717 ReorderBufferChange *change;
1718
1719 change = dlist_container(ReorderBufferChange, node, iter.cur);
1720
1721 /* Check we're not mixing changes from different transactions. */
1722 Assert(change->txn == txn);
1724
1725 /* Remove the change from its containing list. */
1726 dlist_delete(&change->node);
1727
1728 ReorderBufferFreeChange(rb, change, true);
1729 }
1730 }
1731
1732 /*
1733 * Destroy the (relfilelocator, ctid) hashtable, so that we don't leak any
1734 * memory. We could also keep the hash table and update it with new ctid
1735 * values, but this seems simpler and good enough for now.
1736 */
1737 if (txn->tuplecid_hash != NULL)
1738 {
1740 txn->tuplecid_hash = NULL;
1741 }
1742
1743 /* If this txn is serialized then clean the disk space. */
1744 if (rbtxn_is_serialized(txn))
1745 {
1748
1749 /*
1750 * We set this flag to indicate if the transaction is ever serialized.
1751 * We need this to accurately update the stats as otherwise the same
1752 * transaction can be counted as serialized multiple times.
1753 */
1755 }
1756
1757 /* also reset the number of entries in the transaction */
1758 txn->nentries_mem = 0;
1759 txn->nentries = 0;
1760}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::changes, dlist_mutable_iter::cur, dlist_container, dlist_delete(), dlist_foreach_modify, fb(), hash_destroy(), ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, rbtxn_is_known_subxact, rbtxn_is_serialized, RBTXN_IS_SERIALIZED_CLEAR, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferFreeChange(), ReorderBufferMaybeMarkTXNStreamed(), ReorderBufferRestoreCleanup(), ReorderBufferTruncateTXN(), ReorderBufferTXN::subtxns, ReorderBufferTXN::tuplecid_hash, ReorderBufferTXN::tuplecids, ReorderBufferChange::txn, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferProcessTXN(), ReorderBufferResetTXN(), ReorderBufferStreamCommit(), and ReorderBufferTruncateTXN().

◆ ReorderBufferTXNByXid()

static ReorderBufferTXN * ReorderBufferTXNByXid ( ReorderBuffer rb,
TransactionId  xid,
bool  create,
bool is_new,
XLogRecPtr  lsn,
bool  create_as_top 
)
static

Definition at line 654 of file reorderbuffer.c.

656{
657 ReorderBufferTXN *txn;
659 bool found;
660
662
663 /*
664 * Check the one-entry lookup cache first
665 */
666 if (TransactionIdIsValid(rb->by_txn_last_xid) &&
667 rb->by_txn_last_xid == xid)
668 {
669 txn = rb->by_txn_last_txn;
670
671 if (txn != NULL)
672 {
673 /* found it, and it's valid */
674 if (is_new)
675 *is_new = false;
676 return txn;
677 }
678
679 /*
680 * cached as non-existent, and asked not to create? Then nothing else
681 * to do.
682 */
683 if (!create)
684 return NULL;
685 /* otherwise fall through to create it */
686 }
687
688 /*
689 * If the cache wasn't hit or it yielded a "does-not-exist" and we want to
690 * create an entry.
691 */
692
693 /* search the lookup table */
695 hash_search(rb->by_txn,
696 &xid,
697 create ? HASH_ENTER : HASH_FIND,
698 &found);
699 if (found)
700 txn = ent->txn;
701 else if (create)
702 {
703 /* initialize the new entry, if creation was requested */
704 Assert(ent != NULL);
706
708 ent->txn->xid = xid;
709 txn = ent->txn;
710 txn->first_lsn = lsn;
711 txn->restart_decoding_lsn = rb->current_restart_decoding_lsn;
712
713 if (create_as_top)
714 {
715 dlist_push_tail(&rb->toplevel_by_lsn, &txn->node);
717 }
718 }
719 else
720 txn = NULL; /* not found and not asked to create */
721
722 /* update cache */
723 rb->by_txn_last_xid = xid;
724 rb->by_txn_last_txn = txn;
725
726 if (is_new)
727 *is_new = !found;
728
729 Assert(!create || txn != NULL);
730 return txn;
731}

References Assert, AssertTXNLsnOrder(), dlist_push_tail(), fb(), ReorderBufferTXN::first_lsn, HASH_ENTER, HASH_FIND, hash_search(), ReorderBufferTXN::node, ReorderBufferAllocTXN(), ReorderBufferTXN::restart_decoding_lsn, TransactionIdIsValid, and XLogRecPtrIsValid.

Referenced by ReorderBufferAbort(), ReorderBufferAddDistributedInvalidations(), ReorderBufferAddInvalidations(), ReorderBufferAddNewTupleCids(), ReorderBufferAssignChild(), ReorderBufferCommit(), ReorderBufferCommitChild(), ReorderBufferFinishPrepared(), ReorderBufferForget(), ReorderBufferGetInvalidations(), ReorderBufferInvalidate(), ReorderBufferPrepare(), ReorderBufferProcessXid(), ReorderBufferQueueChange(), ReorderBufferQueueMessage(), ReorderBufferRememberPrepareInfo(), ReorderBufferSetBaseSnapshot(), ReorderBufferSkipPrepare(), ReorderBufferXidHasBaseSnapshot(), ReorderBufferXidHasCatalogChanges(), and ReorderBufferXidSetCatalogChanges().

◆ ReorderBufferTXNSizeCompare()

static int ReorderBufferTXNSizeCompare ( const pairingheap_node a,
const pairingheap_node b,
void arg 
)
static

Definition at line 3785 of file reorderbuffer.c.

3786{
3789
3790 if (ta->size < tb->size)
3791 return -1;
3792 if (ta->size > tb->size)
3793 return 1;
3794 return 0;
3795}

References a, b, fb(), and pairingheap_const_container.

Referenced by ReorderBufferAllocate().

◆ ReorderBufferXidHasBaseSnapshot()

bool ReorderBufferXidHasBaseSnapshot ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 3738 of file reorderbuffer.c.

3739{
3740 ReorderBufferTXN *txn;
3741
3742 txn = ReorderBufferTXNByXid(rb, xid, false,
3743 NULL, InvalidXLogRecPtr, false);
3744
3745 /* transaction isn't known yet, ergo no snapshot */
3746 if (txn == NULL)
3747 return false;
3748
3749 /* a known subtxn? operate on top-level txn instead */
3750 if (rbtxn_is_known_subxact(txn))
3751 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3752 NULL, InvalidXLogRecPtr, false);
3753
3754 return txn->base_snapshot != NULL;
3755}

References ReorderBufferTXN::base_snapshot, fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, ReorderBufferTXNByXid(), and ReorderBufferTXN::toplevel_xid.

Referenced by SnapBuildCommitTxn(), SnapBuildDistributeSnapshotAndInval(), and SnapBuildProcessChange().

◆ ReorderBufferXidHasCatalogChanges()

bool ReorderBufferXidHasCatalogChanges ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 3721 of file reorderbuffer.c.

3722{
3723 ReorderBufferTXN *txn;
3724
3725 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3726 false);
3727 if (txn == NULL)
3728 return false;
3729
3730 return rbtxn_has_catalog_changes(txn);
3731}

References fb(), InvalidXLogRecPtr, rbtxn_has_catalog_changes, and ReorderBufferTXNByXid().

Referenced by SnapBuildXidHasCatalogChanges().

◆ ReorderBufferXidSetCatalogChanges()

void ReorderBufferXidSetCatalogChanges ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3649 of file reorderbuffer.c.

3651{
3652 ReorderBufferTXN *txn;
3653
3654 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3655
3656 if (!rbtxn_has_catalog_changes(txn))
3657 {
3659 dclist_push_tail(&rb->catchange_txns, &txn->catchange_node);
3660 }
3661
3662 /*
3663 * Mark top-level transaction as having catalog changes too if one of its
3664 * children has so that the ReorderBufferBuildTupleCidHash can
3665 * conveniently check just top-level transaction and decide whether to
3666 * build the hash table or not.
3667 */
3668 if (rbtxn_is_subtxn(txn))
3669 {
3670 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
3671
3672 if (!rbtxn_has_catalog_changes(toptxn))
3673 {
3675 dclist_push_tail(&rb->catchange_txns, &toptxn->catchange_node);
3676 }
3677 }
3678}

References ReorderBufferTXN::catchange_node, dclist_push_tail(), fb(), rbtxn_get_toptxn, RBTXN_HAS_CATALOG_CHANGES, rbtxn_has_catalog_changes, rbtxn_is_subtxn, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by SnapBuildProcessNewCid(), and xact_decode().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)

Definition at line 5553 of file reorderbuffer.c.

5557{
5560 ForkNumber forkno;
5561 BlockNumber blockno;
5562 bool updated_mapping = false;
5563
5564 /*
5565 * Return unresolved if tuplecid_data is not valid. That's because when
5566 * streaming in-progress transactions we may run into tuples with the CID
5567 * before actually decoding them. Think e.g. about INSERT followed by
5568 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5569 * INSERT. So in such cases, we assume the CID is from the future
5570 * command.
5571 */
5572 if (tuplecid_data == NULL)
5573 return false;
5574
5575 /* be careful about padding */
5576 memset(&key, 0, sizeof(key));
5577
5578 Assert(!BufferIsLocal(buffer));
5579
5580 /*
5581 * get relfilelocator from the buffer, no convenient way to access it
5582 * other than that.
5583 */
5584 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5585
5586 /* tuples can only be in the main fork */
5587 Assert(forkno == MAIN_FORKNUM);
5588 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5589
5590 ItemPointerCopy(&htup->t_self,
5591 &key.tid);
5592
5593restart:
5596
5597 /*
5598 * failed to find a mapping, check whether the table was rewritten and
5599 * apply mapping if so, but only do that once - there can be no new
5600 * mappings while we are in here since we have to hold a lock on the
5601 * relation.
5602 */
5603 if (ent == NULL && !updated_mapping)
5604 {
5606 /* now check but don't update for a mapping again */
5607 updated_mapping = true;
5608 goto restart;
5609 }
5610 else if (ent == NULL)
5611 return false;
5612
5613 if (cmin)
5614 *cmin = ent->cmin;
5615 if (cmax)
5616 *cmax = ent->cmax;
5617 return true;
5618}

References Assert, BufferGetTag(), BufferIsLocal, fb(), HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ SetupCheckXidLive()

static void SetupCheckXidLive ( TransactionId  xid)
inlinestatic

Definition at line 2050 of file reorderbuffer.c.

2051{
2052 /*
2053 * If the input transaction id is already set as a CheckXidAlive then
2054 * nothing to do.
2055 */
2057 return;
2058
2059 /*
2060 * setup CheckXidAlive if it's not committed yet. We don't check if the
2061 * xid is aborted. That will happen during catalog access.
2062 */
2063 if (!TransactionIdDidCommit(xid))
2064 CheckXidAlive = xid;
2065 else
2067}

References CheckXidAlive, InvalidTransactionId, TransactionIdDidCommit(), and TransactionIdEquals.

Referenced by ReorderBufferProcessTXN().

◆ StartupReorderBuffer()

void StartupReorderBuffer ( void  )

Definition at line 4937 of file reorderbuffer.c.

4938{
4940 struct dirent *logical_de;
4941
4944 {
4945 if (strcmp(logical_de->d_name, ".") == 0 ||
4946 strcmp(logical_de->d_name, "..") == 0)
4947 continue;
4948
4949 /* if it cannot be a slot, skip the directory */
4950 if (!ReplicationSlotValidateName(logical_de->d_name, true, DEBUG2))
4951 continue;
4952
4953 /*
4954 * ok, has to be a surviving logical slot, iterate and delete
4955 * everything starting with xid-*
4956 */
4958 }
4960}

References AllocateDir(), DEBUG2, fb(), FreeDir(), PG_REPLSLOT_DIR, ReadDir(), ReorderBufferCleanupSerializedTXNs(), and ReplicationSlotValidateName().

Referenced by StartupXLOG().

◆ TransactionIdInArray()

static bool TransactionIdInArray ( TransactionId  xid,
TransactionId xip,
Size  num 
)
static

Definition at line 5452 of file reorderbuffer.c.

5453{
5454 return bsearch(&xid, xip, num,
5455 sizeof(TransactionId), xidComparator) != NULL;
5456}

References fb(), and xidComparator().

Referenced by UpdateLogicalMappings().

◆ UpdateLogicalMappings()

static void UpdateLogicalMappings ( HTAB tuplecid_data,
Oid  relid,
Snapshot  snapshot 
)
static

Definition at line 5475 of file reorderbuffer.c.

5476{
5478 struct dirent *mapping_de;
5479 List *files = NIL;
5480 ListCell *file;
5481 Oid dboid = IsSharedRelation(relid) ? InvalidOid : MyDatabaseId;
5482
5485 {
5486 Oid f_dboid;
5487 Oid f_relid;
5491 uint32 f_hi,
5492 f_lo;
5494
5495 if (strcmp(mapping_de->d_name, ".") == 0 ||
5496 strcmp(mapping_de->d_name, "..") == 0)
5497 continue;
5498
5499 /* Ignore files that aren't ours */
5500 if (strncmp(mapping_de->d_name, "map-", 4) != 0)
5501 continue;
5502
5504 &f_dboid, &f_relid, &f_hi, &f_lo,
5505 &f_mapped_xid, &f_create_xid) != 6)
5506 elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
5507
5508 f_lsn = ((uint64) f_hi) << 32 | f_lo;
5509
5510 /* mapping for another database */
5511 if (f_dboid != dboid)
5512 continue;
5513
5514 /* mapping for another relation */
5515 if (f_relid != relid)
5516 continue;
5517
5518 /* did the creating transaction abort? */
5520 continue;
5521
5522 /* not for our transaction */
5523 if (!TransactionIdInArray(f_mapped_xid, snapshot->subxip, snapshot->subxcnt))
5524 continue;
5525
5526 /* ok, relevant, queue for apply */
5528 f->lsn = f_lsn;
5529 strcpy(f->fname, mapping_de->d_name);
5530 files = lappend(files, f);
5531 }
5533
5534 /* sort files so we apply them in LSN order */
5536
5537 foreach(file, files)
5538 {
5540
5541 elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
5542 snapshot->subxip[0]);
5544 pfree(f);
5545 }
5546}

References AllocateDir(), ApplyLogicalMappingFile(), DEBUG1, elog, ERROR, fb(), file_sort_by_lsn(), RewriteMappingFile::fname, FreeDir(), InvalidOid, IsSharedRelation(), lappend(), lfirst, list_sort(), LOGICAL_REWRITE_FORMAT, RewriteMappingFile::lsn, MyDatabaseId, NIL, palloc_object, pfree(), PG_LOGICAL_MAPPINGS_DIR, ReadDir(), SnapshotData::subxcnt, SnapshotData::subxip, TransactionIdDidCommit(), TransactionIdInArray(), and tuplecid_data.

Referenced by ResolveCminCmaxDuringDecoding().

Variable Documentation

◆ debug_logical_replication_streaming

◆ logical_decoding_work_mem

int logical_decoding_work_mem

Definition at line 226 of file reorderbuffer.c.

Referenced by ReorderBufferCheckMemoryLimit().

◆ max_changes_in_memory

const Size max_changes_in_memory = 4096
static

Definition at line 227 of file reorderbuffer.c.

Referenced by ReorderBufferRestoreChanges().