PostgreSQL Source Code git master
Loading...
Searching...
No Matches
reorderbuffer.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <sys/stat.h>
#include "access/detoast.h"
#include "access/heapam.h"
#include "access/rewriteheap.h"
#include "access/transam.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "catalog/catalog.h"
#include "common/int.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/reorderbuffer.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/procarray.h"
#include "storage/sinval.h"
#include "utils/builtins.h"
#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/relfilenumbermap.h"
#include "utils/wait_event.h"
Include dependency graph for reorderbuffer.c:

Go to the source code of this file.

Data Structures

struct  ReorderBufferTXNByIdEnt
 
struct  ReorderBufferTupleCidKey
 
struct  ReorderBufferTupleCidEnt
 
struct  TXNEntryFile
 
struct  ReorderBufferIterTXNEntry
 
struct  ReorderBufferIterTXNState
 
struct  ReorderBufferToastEnt
 
struct  ReorderBufferDiskChange
 
struct  RewriteMappingFile
 

Macros

#define MAX_DISTR_INVAL_MSG_PER_TXN    ((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage))
 
#define IsSpecInsert(action)
 
#define IsSpecConfirmOrAbort(action)
 
#define IsInsertOrUpdate(action)
 
#define CHANGES_THRESHOLD   100
 

Typedefs

typedef struct ReorderBufferTXNByIdEnt ReorderBufferTXNByIdEnt
 
typedef struct ReorderBufferTupleCidKey ReorderBufferTupleCidKey
 
typedef struct ReorderBufferTupleCidEnt ReorderBufferTupleCidEnt
 
typedef struct TXNEntryFile TXNEntryFile
 
typedef struct ReorderBufferIterTXNEntry ReorderBufferIterTXNEntry
 
typedef struct ReorderBufferIterTXNState ReorderBufferIterTXNState
 
typedef struct ReorderBufferToastEnt ReorderBufferToastEnt
 
typedef struct ReorderBufferDiskChange ReorderBufferDiskChange
 
typedef struct RewriteMappingFile RewriteMappingFile
 

Functions

static ReorderBufferTXNReorderBufferAllocTXN (ReorderBuffer *rb)
 
static void ReorderBufferFreeTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static ReorderBufferTXNReorderBufferTXNByXid (ReorderBuffer *rb, TransactionId xid, bool create, bool *is_new, XLogRecPtr lsn, bool create_as_top)
 
static void ReorderBufferTransferSnapToParent (ReorderBufferTXN *txn, ReorderBufferTXN *subtxn)
 
static void AssertTXNLsnOrder (ReorderBuffer *rb)
 
static void ReorderBufferIterTXNInit (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferIterTXNState *volatile *iter_state)
 
static ReorderBufferChangeReorderBufferIterTXNNext (ReorderBuffer *rb, ReorderBufferIterTXNState *state)
 
static void ReorderBufferIterTXNFinish (ReorderBuffer *rb, ReorderBufferIterTXNState *state)
 
static void ReorderBufferExecuteInvalidations (uint32 nmsgs, SharedInvalidationMessage *msgs)
 
static void ReorderBufferCheckMemoryLimit (ReorderBuffer *rb)
 
static void ReorderBufferSerializeTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferSerializeChange (ReorderBuffer *rb, ReorderBufferTXN *txn, int fd, ReorderBufferChange *change)
 
static Size ReorderBufferRestoreChanges (ReorderBuffer *rb, ReorderBufferTXN *txn, TXNEntryFile *file, XLogSegNo *segno)
 
static void ReorderBufferRestoreChange (ReorderBuffer *rb, ReorderBufferTXN *txn, char *data)
 
static void ReorderBufferRestoreCleanup (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferTruncateTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, bool txn_prepared)
 
static void ReorderBufferMaybeMarkTXNStreamed (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static bool ReorderBufferCheckAndTruncateAbortedTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferCleanupSerializedTXNs (const char *slotname)
 
static void ReorderBufferSerializedPath (char *path, ReplicationSlot *slot, TransactionId xid, XLogSegNo segno)
 
static int ReorderBufferTXNSizeCompare (const pairingheap_node *a, const pairingheap_node *b, void *arg)
 
static void ReorderBufferFreeSnap (ReorderBuffer *rb, Snapshot snap)
 
static Snapshot ReorderBufferCopySnap (ReorderBuffer *rb, Snapshot orig_snap, ReorderBufferTXN *txn, CommandId cid)
 
static bool ReorderBufferCanStream (ReorderBuffer *rb)
 
static bool ReorderBufferCanStartStreaming (ReorderBuffer *rb)
 
static void ReorderBufferStreamTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferStreamCommit (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastInitHash (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastReset (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastReplace (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
 
static void ReorderBufferToastAppendChunk (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
 
static Size ReorderBufferChangeSize (ReorderBufferChange *change)
 
static void ReorderBufferChangeMemoryUpdate (ReorderBuffer *rb, ReorderBufferChange *change, ReorderBufferTXN *txn, bool addition, Size sz)
 
ReorderBufferReorderBufferAllocate (void)
 
void ReorderBufferFree (ReorderBuffer *rb)
 
ReorderBufferChangeReorderBufferAllocChange (ReorderBuffer *rb)
 
void ReorderBufferFreeChange (ReorderBuffer *rb, ReorderBufferChange *change, bool upd_mem)
 
HeapTuple ReorderBufferAllocTupleBuf (ReorderBuffer *rb, Size tuple_len)
 
void ReorderBufferFreeTupleBuf (HeapTuple tuple)
 
OidReorderBufferAllocRelids (ReorderBuffer *rb, int nrelids)
 
void ReorderBufferFreeRelids (ReorderBuffer *rb, Oid *relids)
 
static void ReorderBufferProcessPartialChange (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool toast_insert)
 
void ReorderBufferQueueChange (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, ReorderBufferChange *change, bool toast_insert)
 
void ReorderBufferQueueMessage (ReorderBuffer *rb, TransactionId xid, Snapshot snap, XLogRecPtr lsn, bool transactional, const char *prefix, Size message_size, const char *message)
 
static void AssertChangeLsnOrder (ReorderBufferTXN *txn)
 
ReorderBufferTXNReorderBufferGetOldestTXN (ReorderBuffer *rb)
 
TransactionId ReorderBufferGetOldestXmin (ReorderBuffer *rb)
 
void ReorderBufferSetRestartPoint (ReorderBuffer *rb, XLogRecPtr ptr)
 
void ReorderBufferAssignChild (ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr lsn)
 
void ReorderBufferCommitChild (ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn)
 
static int ReorderBufferIterCompare (Datum a, Datum b, void *arg)
 
static void ReorderBufferCleanupTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferBuildTupleCidHash (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void SetupCheckXidLive (TransactionId xid)
 
static void ReorderBufferApplyChange (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferApplyTruncate (ReorderBuffer *rb, ReorderBufferTXN *txn, int nrelations, Relation *relations, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferApplyMessage (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferSaveTXNSnapshot (ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id)
 
static void ReorderBufferResetTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id, XLogRecPtr last_lsn, ReorderBufferChange *specinsert)
 
static void ReorderBufferProcessTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr commit_lsn, volatile Snapshot snapshot_now, volatile CommandId command_id, bool streaming)
 
static void ReorderBufferReplay (ReorderBufferTXN *txn, ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
 
void ReorderBufferCommit (ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
 
bool ReorderBufferRememberPrepareInfo (ReorderBuffer *rb, TransactionId xid, XLogRecPtr prepare_lsn, XLogRecPtr end_lsn, TimestampTz prepare_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
 
void ReorderBufferSkipPrepare (ReorderBuffer *rb, TransactionId xid)
 
void ReorderBufferPrepare (ReorderBuffer *rb, TransactionId xid, char *gid)
 
void ReorderBufferFinishPrepared (ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, XLogRecPtr two_phase_at, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn, char *gid, bool is_commit)
 
void ReorderBufferAbort (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, TimestampTz abort_time)
 
void ReorderBufferAbortOld (ReorderBuffer *rb, TransactionId oldestRunningXid)
 
void ReorderBufferForget (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferInvalidate (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferImmediateInvalidation (ReorderBuffer *rb, uint32 ninvalidations, SharedInvalidationMessage *invalidations)
 
void ReorderBufferProcessXid (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferAddSnapshot (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
 
void ReorderBufferSetBaseSnapshot (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
 
void ReorderBufferAddNewCommandId (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, CommandId cid)
 
void ReorderBufferAddNewTupleCids (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, RelFileLocator locator, ItemPointerData tid, CommandId cmin, CommandId cmax, CommandId combocid)
 
static void ReorderBufferQueueInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
static void ReorderBufferAccumulateInvalidations (SharedInvalidationMessage **invals_out, uint32 *ninvals_out, SharedInvalidationMessage *msgs_new, Size nmsgs_new)
 
void ReorderBufferAddInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
void ReorderBufferAddDistributedInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
void ReorderBufferXidSetCatalogChanges (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
TransactionIdReorderBufferGetCatalogChangesXacts (ReorderBuffer *rb)
 
bool ReorderBufferXidHasCatalogChanges (ReorderBuffer *rb, TransactionId xid)
 
bool ReorderBufferXidHasBaseSnapshot (ReorderBuffer *rb, TransactionId xid)
 
static void ReorderBufferSerializeReserve (ReorderBuffer *rb, Size sz)
 
static ReorderBufferTXNReorderBufferLargestTXN (ReorderBuffer *rb)
 
static ReorderBufferTXNReorderBufferLargestStreamableTopTXN (ReorderBuffer *rb)
 
void StartupReorderBuffer (void)
 
static void ApplyLogicalMappingFile (HTAB *tuplecid_data, const char *fname)
 
static bool TransactionIdInArray (TransactionId xid, TransactionId *xip, Size num)
 
static int file_sort_by_lsn (const ListCell *a_p, const ListCell *b_p)
 
static void UpdateLogicalMappings (HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
 
bool ResolveCminCmaxDuringDecoding (HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
uint32 ReorderBufferGetInvalidations (ReorderBuffer *rb, TransactionId xid, SharedInvalidationMessage **msgs)
 

Variables

int logical_decoding_work_mem
 
static const Size max_changes_in_memory = 4096
 
int debug_logical_replication_streaming = DEBUG_LOGICAL_REP_STREAMING_BUFFERED
 

Macro Definition Documentation

◆ CHANGES_THRESHOLD

#define CHANGES_THRESHOLD   100

◆ IsInsertOrUpdate

#define IsInsertOrUpdate (   action)
Value:
( \
(((action) == REORDER_BUFFER_CHANGE_INSERT) || \
((action) == REORDER_BUFFER_CHANGE_UPDATE) || \
)
@ REORDER_BUFFER_CHANGE_INSERT
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT
@ REORDER_BUFFER_CHANGE_UPDATE

Definition at line 207 of file reorderbuffer.c.

325{
326 ReorderBuffer *buffer;
329
331
332 /* allocate memory in own context, to have better accountability */
334 "ReorderBuffer",
336
337 buffer =
339
340 memset(&hash_ctl, 0, sizeof(hash_ctl));
341
342 buffer->context = new_ctx;
343
345 "Change",
347 sizeof(ReorderBufferChange));
348
350 "TXN",
352 sizeof(ReorderBufferTXN));
353
354 /*
355 * To minimize memory fragmentation caused by long-running transactions
356 * with changes spanning multiple memory blocks, we use a single
357 * fixed-size memory block for decoded tuple storage. The performance
358 * testing showed that the default memory block size maintains logical
359 * decoding performance without causing fragmentation due to concurrent
360 * transactions. One might think that we can use the max size as
361 * SLAB_LARGE_BLOCK_SIZE but the test also showed it doesn't help resolve
362 * the memory fragmentation.
363 */
365 "Tuples",
369
370 hash_ctl.keysize = sizeof(TransactionId);
371 hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
372 hash_ctl.hcxt = buffer->context;
373
374 buffer->by_txn = hash_create("ReorderBufferByXid", 1000, &hash_ctl,
376
378 buffer->by_txn_last_txn = NULL;
379
380 buffer->outbuf = NULL;
381 buffer->outbufsize = 0;
382 buffer->size = 0;
383
384 /* txn_heap is ordered by transaction size */
386
387 buffer->spillTxns = 0;
388 buffer->spillCount = 0;
389 buffer->spillBytes = 0;
390 buffer->streamTxns = 0;
391 buffer->streamCount = 0;
392 buffer->streamBytes = 0;
393 buffer->memExceededCount = 0;
394 buffer->totalTxns = 0;
395 buffer->totalBytes = 0;
396
398
399 dlist_init(&buffer->toplevel_by_lsn);
401 dclist_init(&buffer->catchange_txns);
402
403 /*
404 * Ensure there's no stale data from prior uses of this slot, in case some
405 * prior exit avoided calling ReorderBufferFree. Failure to do this can
406 * produce duplicated txns, and it's very cheap if there's nothing there.
407 */
409
410 return buffer;
411}
412
413/*
414 * Free a ReorderBuffer
415 */
416void
418{
419 MemoryContext context = rb->context;
420
421 /*
422 * We free separately allocated data by entirely scrapping reorderbuffer's
423 * memory context.
424 */
425 MemoryContextDelete(context);
426
427 /* Free disk space used by unconsumed reorder buffers */
429}
430
431/*
432 * Allocate a new ReorderBufferTXN.
433 */
434static ReorderBufferTXN *
436{
437 ReorderBufferTXN *txn;
438
439 txn = (ReorderBufferTXN *)
440 MemoryContextAlloc(rb->txn_context, sizeof(ReorderBufferTXN));
441
442 memset(txn, 0, sizeof(ReorderBufferTXN));
443
444 dlist_init(&txn->changes);
445 dlist_init(&txn->tuplecids);
446 dlist_init(&txn->subtxns);
447
448 /* InvalidCommandId is not zero, so set it explicitly */
451
452 return txn;
453}
454
455/*
456 * Free a ReorderBufferTXN.
457 */
458static void
460{
461 /* clean the lookup cache if we were cached (quite likely) */
462 if (rb->by_txn_last_xid == txn->xid)
463 {
464 rb->by_txn_last_xid = InvalidTransactionId;
465 rb->by_txn_last_txn = NULL;
466 }
467
468 /* free data that's contained */
469
470 if (txn->gid != NULL)
471 {
472 pfree(txn->gid);
473 txn->gid = NULL;
474 }
475
476 if (txn->tuplecid_hash != NULL)
477 {
479 txn->tuplecid_hash = NULL;
480 }
481
482 if (txn->invalidations)
483 {
484 pfree(txn->invalidations);
485 txn->invalidations = NULL;
486 }
487
489 {
492 }
493
494 /* Reset the toast hash */
496
497 /* All changes must be deallocated */
498 Assert(txn->size == 0);
499
500 pfree(txn);
501}
502
503/*
504 * Allocate a ReorderBufferChange.
505 */
508{
509 ReorderBufferChange *change;
510
511 change = (ReorderBufferChange *)
512 MemoryContextAlloc(rb->change_context, sizeof(ReorderBufferChange));
513
514 memset(change, 0, sizeof(ReorderBufferChange));
515 return change;
516}
517
518/*
519 * Free a ReorderBufferChange and update memory accounting, if requested.
520 */
521void
523 bool upd_mem)
524{
525 /* update memory accounting info */
526 if (upd_mem)
529
530 /* free contained data */
531 switch (change->action)
532 {
537 if (change->data.tp.newtuple)
538 {
540 change->data.tp.newtuple = NULL;
541 }
542
543 if (change->data.tp.oldtuple)
544 {
546 change->data.tp.oldtuple = NULL;
547 }
548 break;
550 if (change->data.msg.prefix != NULL)
551 pfree(change->data.msg.prefix);
552 change->data.msg.prefix = NULL;
553 if (change->data.msg.message != NULL)
554 pfree(change->data.msg.message);
555 change->data.msg.message = NULL;
556 break;
558 if (change->data.inval.invalidations)
559 pfree(change->data.inval.invalidations);
560 change->data.inval.invalidations = NULL;
561 break;
563 if (change->data.snapshot)
564 {
566 change->data.snapshot = NULL;
567 }
568 break;
569 /* no data in addition to the struct itself */
571 if (change->data.truncate.relids != NULL)
572 {
574 change->data.truncate.relids = NULL;
575 }
576 break;
581 break;
582 }
583
584 pfree(change);
585}
586
587/*
588 * Allocate a HeapTuple fitting a tuple of size tuple_len (excluding header
589 * overhead).
590 */
593{
594 HeapTuple tuple;
596
597 alloc_len = tuple_len + SizeofHeapTupleHeader;
598
599 tuple = (HeapTuple) MemoryContextAlloc(rb->tup_context,
601 tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
602
603 return tuple;
604}
605
606/*
607 * Free a HeapTuple returned by ReorderBufferAllocTupleBuf().
608 */
609void
611{
612 pfree(tuple);
613}
614
615/*
616 * Allocate an array for relids of truncated relations.
617 *
618 * We use the global memory context (for the whole reorder buffer), because
619 * none of the existing ones seems like a good match (some are SLAB, so we
620 * can't use those, and tup_context is meant for tuple data, not relids). We
621 * could add yet another context, but it seems like an overkill - TRUNCATE is
622 * not particularly common operation, so it does not seem worth it.
623 */
624Oid *
626{
627 Oid *relids;
629
630 alloc_len = sizeof(Oid) * nrelids;
631
632 relids = (Oid *) MemoryContextAlloc(rb->context, alloc_len);
633
634 return relids;
635}
636
637/*
638 * Free an array of relids.
639 */
640void
642{
643 pfree(relids);
644}
645
646/*
647 * Return the ReorderBufferTXN from the given buffer, specified by Xid.
648 * If create is true, and a transaction doesn't already exist, create it
649 * (with the given LSN, and as top transaction if that's specified);
650 * when this happens, is_new is set to true.
651 */
652static ReorderBufferTXN *
654 bool *is_new, XLogRecPtr lsn, bool create_as_top)
655{
656 ReorderBufferTXN *txn;
658 bool found;
659
661
662 /*
663 * Check the one-entry lookup cache first
664 */
665 if (TransactionIdIsValid(rb->by_txn_last_xid) &&
666 rb->by_txn_last_xid == xid)
667 {
668 txn = rb->by_txn_last_txn;
669
670 if (txn != NULL)
671 {
672 /* found it, and it's valid */
673 if (is_new)
674 *is_new = false;
675 return txn;
676 }
677
678 /*
679 * cached as non-existent, and asked not to create? Then nothing else
680 * to do.
681 */
682 if (!create)
683 return NULL;
684 /* otherwise fall through to create it */
685 }
686
687 /*
688 * If the cache wasn't hit or it yielded a "does-not-exist" and we want to
689 * create an entry.
690 */
691
692 /* search the lookup table */
694 hash_search(rb->by_txn,
695 &xid,
696 create ? HASH_ENTER : HASH_FIND,
697 &found);
698 if (found)
699 txn = ent->txn;
700 else if (create)
701 {
702 /* initialize the new entry, if creation was requested */
703 Assert(ent != NULL);
705
707 ent->txn->xid = xid;
708 txn = ent->txn;
709 txn->first_lsn = lsn;
710 txn->restart_decoding_lsn = rb->current_restart_decoding_lsn;
711
712 if (create_as_top)
713 {
714 dlist_push_tail(&rb->toplevel_by_lsn, &txn->node);
716 }
717 }
718 else
719 txn = NULL; /* not found and not asked to create */
720
721 /* update cache */
722 rb->by_txn_last_xid = xid;
723 rb->by_txn_last_txn = txn;
724
725 if (is_new)
726 *is_new = !found;
727
728 Assert(!create || txn != NULL);
729 return txn;
730}
731
732/*
733 * Record the partial change for the streaming of in-progress transactions. We
734 * can stream only complete changes so if we have a partial change like toast
735 * table insert or speculative insert then we mark such a 'txn' so that it
736 * can't be streamed. We also ensure that if the changes in such a 'txn' can
737 * be streamed and are above logical_decoding_work_mem threshold then we stream
738 * them as soon as we have a complete change.
739 */
740static void
742 ReorderBufferChange *change,
743 bool toast_insert)
744{
745 ReorderBufferTXN *toptxn;
746
747 /*
748 * The partial changes need to be processed only while streaming
749 * in-progress transactions.
750 */
752 return;
753
754 /* Get the top transaction. */
755 toptxn = rbtxn_get_toptxn(txn);
756
757 /*
758 * Indicate a partial change for toast inserts. The change will be
759 * considered as complete once we get the insert or update on the main
760 * table and we are sure that the pending toast chunks are not required
761 * anymore.
762 *
763 * If we allow streaming when there are pending toast chunks then such
764 * chunks won't be released till the insert (multi_insert) is complete and
765 * we expect the txn to have streamed all changes after streaming. This
766 * restriction is mainly to ensure the correctness of streamed
767 * transactions and it doesn't seem worth uplifting such a restriction
768 * just to allow this case because anyway we will stream the transaction
769 * once such an insert is complete.
770 */
771 if (toast_insert)
773 else if (rbtxn_has_partial_change(toptxn) &&
774 IsInsertOrUpdate(change->action) &&
777
778 /*
779 * Indicate a partial change for speculative inserts. The change will be
780 * considered as complete once we get the speculative confirm or abort
781 * token.
782 */
783 if (IsSpecInsert(change->action))
785 else if (rbtxn_has_partial_change(toptxn) &&
788
789 /*
790 * Stream the transaction if it is serialized before and the changes are
791 * now complete in the top-level transaction.
792 *
793 * The reason for doing the streaming of such a transaction as soon as we
794 * get the complete change for it is that previously it would have reached
795 * the memory threshold and wouldn't get streamed because of incomplete
796 * changes. Delaying such transactions would increase apply lag for them.
797 */
799 !(rbtxn_has_partial_change(toptxn)) &&
800 rbtxn_is_serialized(txn) &&
802 ReorderBufferStreamTXN(rb, toptxn);
803}
804
805/*
806 * Queue a change into a transaction so it can be replayed upon commit or will be
807 * streamed when we reach logical_decoding_work_mem threshold.
808 */
809void
811 ReorderBufferChange *change, bool toast_insert)
812{
813 ReorderBufferTXN *txn;
814
815 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
816
817 /*
818 * If we have detected that the transaction is aborted while streaming the
819 * previous changes or by checking its CLOG, there is no point in
820 * collecting further changes for it.
821 */
822 if (rbtxn_is_aborted(txn))
823 {
824 /*
825 * We don't need to update memory accounting for this change as we
826 * have not added it to the queue yet.
827 */
828 ReorderBufferFreeChange(rb, change, false);
829 return;
830 }
831
832 /*
833 * The changes that are sent downstream are considered streamable. We
834 * remember such transactions so that only those will later be considered
835 * for streaming.
836 */
837 if (change->action == REORDER_BUFFER_CHANGE_INSERT ||
843 {
844 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
845
847 }
848
849 change->lsn = lsn;
850 change->txn = txn;
851
853 dlist_push_tail(&txn->changes, &change->node);
854 txn->nentries++;
855 txn->nentries_mem++;
856
857 /* update memory accounting information */
860
861 /* process partial change */
863
864 /* check the memory limits and evict something if needed */
866}
867
868/*
869 * A transactional message is queued to be processed upon commit and a
870 * non-transactional message gets processed immediately.
871 */
872void
875 bool transactional, const char *prefix,
876 Size message_size, const char *message)
877{
878 if (transactional)
879 {
880 MemoryContext oldcontext;
881 ReorderBufferChange *change;
882
884
885 /*
886 * We don't expect snapshots for transactional changes - we'll use the
887 * snapshot derived later during apply (unless the change gets
888 * skipped).
889 */
890 Assert(!snap);
891
892 oldcontext = MemoryContextSwitchTo(rb->context);
893
896 change->data.msg.prefix = pstrdup(prefix);
897 change->data.msg.message_size = message_size;
898 change->data.msg.message = palloc(message_size);
899 memcpy(change->data.msg.message, message, message_size);
900
901 ReorderBufferQueueChange(rb, xid, lsn, change, false);
902
903 MemoryContextSwitchTo(oldcontext);
904 }
905 else
906 {
907 ReorderBufferTXN *txn = NULL;
908 volatile Snapshot snapshot_now = snap;
909
910 /* Non-transactional changes require a valid snapshot. */
911 Assert(snapshot_now);
912
913 if (xid != InvalidTransactionId)
914 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
915
916 /* setup snapshot to allow catalog access */
917 SetupHistoricSnapshot(snapshot_now, NULL);
918 PG_TRY();
919 {
920 rb->message(rb, txn, lsn, false, prefix, message_size, message);
921
923 }
924 PG_CATCH();
925 {
927 PG_RE_THROW();
928 }
929 PG_END_TRY();
930 }
931}
932
933/*
934 * AssertTXNLsnOrder
935 * Verify LSN ordering of transaction lists in the reorderbuffer
936 *
937 * Other LSN-related invariants are checked too.
938 *
939 * No-op if assertions are not in use.
940 */
941static void
943{
944#ifdef USE_ASSERT_CHECKING
945 LogicalDecodingContext *ctx = rb->private_data;
946 dlist_iter iter;
949
950 /*
951 * Skip the verification if we don't reach the LSN at which we start
952 * decoding the contents of transactions yet because until we reach the
953 * LSN, we could have transactions that don't have the association between
954 * the top-level transaction and subtransaction yet and consequently have
955 * the same LSN. We don't guarantee this association until we try to
956 * decode the actual contents of transaction. The ordering of the records
957 * prior to the start_decoding_at LSN should have been checked before the
958 * restart.
959 */
961 return;
962
963 dlist_foreach(iter, &rb->toplevel_by_lsn)
964 {
966 iter.cur);
967
968 /* start LSN must be set */
969 Assert(XLogRecPtrIsValid(cur_txn->first_lsn));
970
971 /* If there is an end LSN, it must be higher than start LSN */
972 if (XLogRecPtrIsValid(cur_txn->end_lsn))
973 Assert(cur_txn->first_lsn <= cur_txn->end_lsn);
974
975 /* Current initial LSN must be strictly higher than previous */
978
979 /* known-as-subtxn txns must not be listed */
981
982 prev_first_lsn = cur_txn->first_lsn;
983 }
984
985 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
986 {
988 base_snapshot_node,
989 iter.cur);
990
991 /* base snapshot (and its LSN) must be set */
992 Assert(cur_txn->base_snapshot != NULL);
993 Assert(XLogRecPtrIsValid(cur_txn->base_snapshot_lsn));
994
995 /* current LSN must be strictly higher than previous */
997 Assert(prev_base_snap_lsn < cur_txn->base_snapshot_lsn);
998
999 /* known-as-subtxn txns must not be listed */
1001
1002 prev_base_snap_lsn = cur_txn->base_snapshot_lsn;
1003 }
1004#endif
1005}
1006
1007/*
1008 * AssertChangeLsnOrder
1009 *
1010 * Check ordering of changes in the (sub)transaction.
1011 */
1012static void
1014{
1015#ifdef USE_ASSERT_CHECKING
1016 dlist_iter iter;
1018
1019 dlist_foreach(iter, &txn->changes)
1020 {
1022
1024
1027 Assert(txn->first_lsn <= cur_change->lsn);
1028
1029 if (XLogRecPtrIsValid(txn->end_lsn))
1030 Assert(cur_change->lsn <= txn->end_lsn);
1031
1033
1034 prev_lsn = cur_change->lsn;
1035 }
1036#endif
1037}
1038
1039/*
1040 * ReorderBufferGetOldestTXN
1041 * Return oldest transaction in reorderbuffer
1042 */
1045{
1046 ReorderBufferTXN *txn;
1047
1049
1050 if (dlist_is_empty(&rb->toplevel_by_lsn))
1051 return NULL;
1052
1053 txn = dlist_head_element(ReorderBufferTXN, node, &rb->toplevel_by_lsn);
1054
1057 return txn;
1058}
1059
1060/*
1061 * ReorderBufferGetOldestXmin
1062 * Return oldest Xmin in reorderbuffer
1063 *
1064 * Returns oldest possibly running Xid from the point of view of snapshots
1065 * used in the transactions kept by reorderbuffer, or InvalidTransactionId if
1066 * there are none.
1067 *
1068 * Since snapshots are assigned monotonically, this equals the Xmin of the
1069 * base snapshot with minimal base_snapshot_lsn.
1070 */
1073{
1074 ReorderBufferTXN *txn;
1075
1077
1078 if (dlist_is_empty(&rb->txns_by_base_snapshot_lsn))
1079 return InvalidTransactionId;
1080
1081 txn = dlist_head_element(ReorderBufferTXN, base_snapshot_node,
1082 &rb->txns_by_base_snapshot_lsn);
1083 return txn->base_snapshot->xmin;
1084}
1085
1086void
1088{
1089 rb->current_restart_decoding_lsn = ptr;
1090}
1091
1092/*
1093 * ReorderBufferAssignChild
1094 *
1095 * Make note that we know that subxid is a subtransaction of xid, seen as of
1096 * the given lsn.
1097 */
1098void
1100 TransactionId subxid, XLogRecPtr lsn)
1101{
1102 ReorderBufferTXN *txn;
1104 bool new_top;
1105 bool new_sub;
1106
1107 txn = ReorderBufferTXNByXid(rb, xid, true, &new_top, lsn, true);
1108 subtxn = ReorderBufferTXNByXid(rb, subxid, true, &new_sub, lsn, false);
1109
1110 if (!new_sub)
1111 {
1113 {
1114 /* already associated, nothing to do */
1115 return;
1116 }
1117 else
1118 {
1119 /*
1120 * We already saw this transaction, but initially added it to the
1121 * list of top-level txns. Now that we know it's not top-level,
1122 * remove it from there.
1123 */
1124 dlist_delete(&subtxn->node);
1125 }
1126 }
1127
1128 subtxn->txn_flags |= RBTXN_IS_SUBXACT;
1129 subtxn->toplevel_xid = xid;
1130 Assert(subtxn->nsubtxns == 0);
1131
1132 /* set the reference to top-level transaction */
1133 subtxn->toptxn = txn;
1134
1135 /* add to subtransaction list */
1136 dlist_push_tail(&txn->subtxns, &subtxn->node);
1137 txn->nsubtxns++;
1138
1139 /* Possibly transfer the subtxn's snapshot to its top-level txn. */
1141
1142 /* Verify LSN-ordering invariant */
1144}
1145
1146/*
1147 * ReorderBufferTransferSnapToParent
1148 * Transfer base snapshot from subtxn to top-level txn, if needed
1149 *
1150 * This is done if the top-level txn doesn't have a base snapshot, or if the
1151 * subtxn's base snapshot has an earlier LSN than the top-level txn's base
1152 * snapshot's LSN. This can happen if there are no changes in the toplevel
1153 * txn but there are some in the subtxn, or the first change in subtxn has
1154 * earlier LSN than first change in the top-level txn and we learned about
1155 * their kinship only now.
1156 *
1157 * The subtransaction's snapshot is cleared regardless of the transfer
1158 * happening, since it's not needed anymore in either case.
1159 *
1160 * We do this as soon as we become aware of their kinship, to avoid queueing
1161 * extra snapshots to txns known-as-subtxns -- only top-level txns will
1162 * receive further snapshots.
1163 */
1164static void
1167{
1168 Assert(subtxn->toplevel_xid == txn->xid);
1169
1170 if (subtxn->base_snapshot != NULL)
1171 {
1172 if (txn->base_snapshot == NULL ||
1173 subtxn->base_snapshot_lsn < txn->base_snapshot_lsn)
1174 {
1175 /*
1176 * If the toplevel transaction already has a base snapshot but
1177 * it's newer than the subxact's, purge it.
1178 */
1179 if (txn->base_snapshot != NULL)
1180 {
1183 }
1184
1185 /*
1186 * The snapshot is now the top transaction's; transfer it, and
1187 * adjust the list position of the top transaction in the list by
1188 * moving it to where the subtransaction is.
1189 */
1190 txn->base_snapshot = subtxn->base_snapshot;
1191 txn->base_snapshot_lsn = subtxn->base_snapshot_lsn;
1192 dlist_insert_before(&subtxn->base_snapshot_node,
1193 &txn->base_snapshot_node);
1194
1195 /*
1196 * The subtransaction doesn't have a snapshot anymore (so it
1197 * mustn't be in the list.)
1198 */
1199 subtxn->base_snapshot = NULL;
1200 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1201 dlist_delete(&subtxn->base_snapshot_node);
1202 }
1203 else
1204 {
1205 /* Base snap of toplevel is fine, so subxact's is not needed */
1206 SnapBuildSnapDecRefcount(subtxn->base_snapshot);
1207 dlist_delete(&subtxn->base_snapshot_node);
1208 subtxn->base_snapshot = NULL;
1209 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1210 }
1211 }
1212}
1213
1214/*
1215 * Associate a subtransaction with its toplevel transaction at commit
1216 * time. There may be no further changes added after this.
1217 */
1218void
1220 TransactionId subxid, XLogRecPtr commit_lsn,
1221 XLogRecPtr end_lsn)
1222{
1224
1225 subtxn = ReorderBufferTXNByXid(rb, subxid, false, NULL,
1226 InvalidXLogRecPtr, false);
1227
1228 /*
1229 * No need to do anything if that subtxn didn't contain any changes
1230 */
1231 if (!subtxn)
1232 return;
1233
1234 subtxn->final_lsn = commit_lsn;
1235 subtxn->end_lsn = end_lsn;
1236
1237 /*
1238 * Assign this subxact as a child of the toplevel xact (no-op if already
1239 * done.)
1240 */
1242}
1243
1244
1245/*
1246 * Support for efficiently iterating over a transaction's and its
1247 * subtransactions' changes.
1248 *
1249 * We do by doing a k-way merge between transactions/subtransactions. For that
1250 * we model the current heads of the different transactions as a binary heap
1251 * so we easily know which (sub-)transaction has the change with the smallest
1252 * lsn next.
1253 *
1254 * We assume the changes in individual transactions are already sorted by LSN.
1255 */
1256
1257/*
1258 * Binary heap comparison function.
1259 */
1260static int
1262{
1264 XLogRecPtr pos_a = state->entries[DatumGetInt32(a)].lsn;
1265 XLogRecPtr pos_b = state->entries[DatumGetInt32(b)].lsn;
1266
1267 if (pos_a < pos_b)
1268 return 1;
1269 else if (pos_a == pos_b)
1270 return 0;
1271 return -1;
1272}
1273
1274/*
1275 * Allocate & initialize an iterator which iterates in lsn order over a
1276 * transaction and all its subtransactions.
1277 *
1278 * Note: The iterator state is returned through iter_state parameter rather
1279 * than the function's return value. This is because the state gets cleaned up
1280 * in a PG_CATCH block in the caller, so we want to make sure the caller gets
1281 * back the state even if this function throws an exception.
1282 */
1283static void
1286{
1287 Size nr_txns = 0;
1290 int32 off;
1291
1292 *iter_state = NULL;
1293
1294 /* Check ordering of changes in the toplevel transaction. */
1296
1297 /*
1298 * Calculate the size of our heap: one element for every transaction that
1299 * contains changes. (Besides the transactions already in the reorder
1300 * buffer, we count the one we were directly passed.)
1301 */
1302 if (txn->nentries > 0)
1303 nr_txns++;
1304
1306 {
1308
1310
1311 /* Check ordering of changes in this subtransaction. */
1313
1314 if (cur_txn->nentries > 0)
1315 nr_txns++;
1316 }
1317
1318 /* allocate iteration state */
1320 MemoryContextAllocZero(rb->context,
1322 sizeof(ReorderBufferIterTXNEntry) * nr_txns);
1323
1324 state->nr_txns = nr_txns;
1325 dlist_init(&state->old_change);
1326
1327 for (off = 0; off < state->nr_txns; off++)
1328 {
1329 state->entries[off].file.vfd = -1;
1330 state->entries[off].segno = 0;
1331 }
1332
1333 /* allocate heap */
1334 state->heap = binaryheap_allocate(state->nr_txns,
1336 state);
1337
1338 /* Now that the state fields are initialized, it is safe to return it. */
1339 *iter_state = state;
1340
1341 /*
1342 * Now insert items into the binary heap, in an unordered fashion. (We
1343 * will run a heap assembly step at the end; this is more efficient.)
1344 */
1345
1346 off = 0;
1347
1348 /* add toplevel transaction if it contains changes */
1349 if (txn->nentries > 0)
1350 {
1352
1353 if (rbtxn_is_serialized(txn))
1354 {
1355 /* serialize remaining changes */
1357 ReorderBufferRestoreChanges(rb, txn, &state->entries[off].file,
1358 &state->entries[off].segno);
1359 }
1360
1362 &txn->changes);
1363
1364 state->entries[off].lsn = cur_change->lsn;
1365 state->entries[off].change = cur_change;
1366 state->entries[off].txn = txn;
1367
1369 }
1370
1371 /* add subtransactions if they contain changes */
1373 {
1375
1377
1378 if (cur_txn->nentries > 0)
1379 {
1381
1383 {
1384 /* serialize remaining changes */
1387 &state->entries[off].file,
1388 &state->entries[off].segno);
1389 }
1391 &cur_txn->changes);
1392
1393 state->entries[off].lsn = cur_change->lsn;
1394 state->entries[off].change = cur_change;
1395 state->entries[off].txn = cur_txn;
1396
1398 }
1399 }
1400
1401 /* assemble a valid binary heap */
1402 binaryheap_build(state->heap);
1403}
1404
1405/*
1406 * Return the next change when iterating over a transaction and its
1407 * subtransactions.
1408 *
1409 * Returns NULL when no further changes exist.
1410 */
1411static ReorderBufferChange *
1413{
1414 ReorderBufferChange *change;
1416 int32 off;
1417
1418 /* nothing there anymore */
1419 if (binaryheap_empty(state->heap))
1420 return NULL;
1421
1422 off = DatumGetInt32(binaryheap_first(state->heap));
1423 entry = &state->entries[off];
1424
1425 /* free memory we might have "leaked" in the previous *Next call */
1426 if (!dlist_is_empty(&state->old_change))
1427 {
1428 change = dlist_container(ReorderBufferChange, node,
1429 dlist_pop_head_node(&state->old_change));
1430 ReorderBufferFreeChange(rb, change, true);
1431 Assert(dlist_is_empty(&state->old_change));
1432 }
1433
1434 change = entry->change;
1435
1436 /*
1437 * update heap with information about which transaction has the next
1438 * relevant change in LSN order
1439 */
1440
1441 /* there are in-memory changes */
1442 if (dlist_has_next(&entry->txn->changes, &entry->change->node))
1443 {
1444 dlist_node *next = dlist_next_node(&entry->txn->changes, &change->node);
1447
1448 /* txn stays the same */
1449 state->entries[off].lsn = next_change->lsn;
1450 state->entries[off].change = next_change;
1451
1453 return change;
1454 }
1455
1456 /* try to load changes from disk */
1457 if (entry->txn->nentries != entry->txn->nentries_mem)
1458 {
1459 /*
1460 * Ugly: restoring changes will reuse *Change records, thus delete the
1461 * current one from the per-tx list and only free in the next call.
1462 */
1463 dlist_delete(&change->node);
1464 dlist_push_tail(&state->old_change, &change->node);
1465
1466 /*
1467 * Update the total bytes processed by the txn for which we are
1468 * releasing the current set of changes and restoring the new set of
1469 * changes.
1470 */
1471 rb->totalBytes += entry->txn->size;
1472 if (ReorderBufferRestoreChanges(rb, entry->txn, &entry->file,
1473 &state->entries[off].segno))
1474 {
1475 /* successfully restored changes from disk */
1478 &entry->txn->changes);
1479
1480 elog(DEBUG2, "restored %u/%u changes from disk",
1481 (uint32) entry->txn->nentries_mem,
1482 (uint32) entry->txn->nentries);
1483
1484 Assert(entry->txn->nentries_mem);
1485 /* txn stays the same */
1486 state->entries[off].lsn = next_change->lsn;
1487 state->entries[off].change = next_change;
1489
1490 return change;
1491 }
1492 }
1493
1494 /* ok, no changes there anymore, remove */
1496
1497 return change;
1498}
1499
1500/*
1501 * Deallocate the iterator
1502 */
1503static void
1506{
1507 int32 off;
1508
1509 for (off = 0; off < state->nr_txns; off++)
1510 {
1511 if (state->entries[off].file.vfd != -1)
1512 FileClose(state->entries[off].file.vfd);
1513 }
1514
1515 /* free memory we might have "leaked" in the last *Next call */
1516 if (!dlist_is_empty(&state->old_change))
1517 {
1518 ReorderBufferChange *change;
1519
1520 change = dlist_container(ReorderBufferChange, node,
1521 dlist_pop_head_node(&state->old_change));
1522 ReorderBufferFreeChange(rb, change, true);
1523 Assert(dlist_is_empty(&state->old_change));
1524 }
1525
1526 binaryheap_free(state->heap);
1527 pfree(state);
1528}
1529
1530/*
1531 * Cleanup the contents of a transaction, usually after the transaction
1532 * committed or aborted.
1533 */
1534static void
1536{
1537 bool found;
1538 dlist_mutable_iter iter;
1539 Size mem_freed = 0;
1540
1541 /* cleanup subtransactions & their changes */
1542 dlist_foreach_modify(iter, &txn->subtxns)
1543 {
1545
1547
1548 /*
1549 * Subtransactions are always associated to the toplevel TXN, even if
1550 * they originally were happening inside another subtxn, so we won't
1551 * ever recurse more than one level deep here.
1552 */
1554 Assert(subtxn->nsubtxns == 0);
1555
1557 }
1558
1559 /* cleanup changes in the txn */
1560 dlist_foreach_modify(iter, &txn->changes)
1561 {
1562 ReorderBufferChange *change;
1563
1564 change = dlist_container(ReorderBufferChange, node, iter.cur);
1565
1566 /* Check we're not mixing changes from different transactions. */
1567 Assert(change->txn == txn);
1568
1569 /*
1570 * Instead of updating the memory counter for individual changes, we
1571 * sum up the size of memory to free so we can update the memory
1572 * counter all together below. This saves costs of maintaining the
1573 * max-heap.
1574 */
1576
1577 ReorderBufferFreeChange(rb, change, false);
1578 }
1579
1580 /* Update the memory counter */
1582
1583 /*
1584 * Cleanup the tuplecids we stored for decoding catalog snapshot access.
1585 * They are always stored in the toplevel transaction.
1586 */
1587 dlist_foreach_modify(iter, &txn->tuplecids)
1588 {
1589 ReorderBufferChange *change;
1590
1591 change = dlist_container(ReorderBufferChange, node, iter.cur);
1592
1593 /* Check we're not mixing changes from different transactions. */
1594 Assert(change->txn == txn);
1596
1597 ReorderBufferFreeChange(rb, change, true);
1598 }
1599
1600 /*
1601 * Cleanup the base snapshot, if set.
1602 */
1603 if (txn->base_snapshot != NULL)
1604 {
1607 }
1608
1609 /*
1610 * Cleanup the snapshot for the last streamed run.
1611 */
1612 if (txn->snapshot_now != NULL)
1613 {
1616 }
1617
1618 /*
1619 * Remove TXN from its containing lists.
1620 *
1621 * Note: if txn is known as subxact, we are deleting the TXN from its
1622 * parent's list of known subxacts; this leaves the parent's nsubxacts
1623 * count too high, but we don't care. Otherwise, we are deleting the TXN
1624 * from the LSN-ordered list of toplevel TXNs. We remove the TXN from the
1625 * list of catalog modifying transactions as well.
1626 */
1627 dlist_delete(&txn->node);
1629 dclist_delete_from(&rb->catchange_txns, &txn->catchange_node);
1630
1631 /* now remove reference from buffer */
1632 hash_search(rb->by_txn, &txn->xid, HASH_REMOVE, &found);
1633 Assert(found);
1634
1635 /* remove entries spilled to disk */
1636 if (rbtxn_is_serialized(txn))
1638
1639 /* deallocate */
1641}
1642
1643/*
1644 * Discard changes from a transaction (and subtransactions), either after
1645 * streaming, decoding them at PREPARE, or detecting the transaction abort.
1646 * Keep the remaining info - transactions, tuplecids, invalidations and
1647 * snapshots.
1648 *
1649 * We additionally remove tuplecids after decoding the transaction at prepare
1650 * time as we only need to perform invalidation at rollback or commit prepared.
1651 *
1652 * 'txn_prepared' indicates that we have decoded the transaction at prepare
1653 * time.
1654 */
1655static void
1657{
1658 dlist_mutable_iter iter;
1659 Size mem_freed = 0;
1660
1661 /* cleanup subtransactions & their changes */
1662 dlist_foreach_modify(iter, &txn->subtxns)
1663 {
1665
1667
1668 /*
1669 * Subtransactions are always associated to the toplevel TXN, even if
1670 * they originally were happening inside another subtxn, so we won't
1671 * ever recurse more than one level deep here.
1672 */
1674 Assert(subtxn->nsubtxns == 0);
1675
1678 }
1679
1680 /* cleanup changes in the txn */
1681 dlist_foreach_modify(iter, &txn->changes)
1682 {
1683 ReorderBufferChange *change;
1684
1685 change = dlist_container(ReorderBufferChange, node, iter.cur);
1686
1687 /* Check we're not mixing changes from different transactions. */
1688 Assert(change->txn == txn);
1689
1690 /* remove the change from its containing list */
1691 dlist_delete(&change->node);
1692
1693 /*
1694 * Instead of updating the memory counter for individual changes, we
1695 * sum up the size of memory to free so we can update the memory
1696 * counter all together below. This saves costs of maintaining the
1697 * max-heap.
1698 */
1700
1701 ReorderBufferFreeChange(rb, change, false);
1702 }
1703
1704 /* Update the memory counter */
1706
1707 if (txn_prepared)
1708 {
1709 /*
1710 * If this is a prepared txn, cleanup the tuplecids we stored for
1711 * decoding catalog snapshot access. They are always stored in the
1712 * toplevel transaction.
1713 */
1714 dlist_foreach_modify(iter, &txn->tuplecids)
1715 {
1716 ReorderBufferChange *change;
1717
1718 change = dlist_container(ReorderBufferChange, node, iter.cur);
1719
1720 /* Check we're not mixing changes from different transactions. */
1721 Assert(change->txn == txn);
1723
1724 /* Remove the change from its containing list. */
1725 dlist_delete(&change->node);
1726
1727 ReorderBufferFreeChange(rb, change, true);
1728 }
1729 }
1730
1731 /*
1732 * Destroy the (relfilelocator, ctid) hashtable, so that we don't leak any
1733 * memory. We could also keep the hash table and update it with new ctid
1734 * values, but this seems simpler and good enough for now.
1735 */
1736 if (txn->tuplecid_hash != NULL)
1737 {
1739 txn->tuplecid_hash = NULL;
1740 }
1741
1742 /* If this txn is serialized then clean the disk space. */
1743 if (rbtxn_is_serialized(txn))
1744 {
1747
1748 /*
1749 * We set this flag to indicate if the transaction is ever serialized.
1750 * We need this to accurately update the stats as otherwise the same
1751 * transaction can be counted as serialized multiple times.
1752 */
1754 }
1755
1756 /* also reset the number of entries in the transaction */
1757 txn->nentries_mem = 0;
1758 txn->nentries = 0;
1759}
1760
1761/*
1762 * Check the transaction status by CLOG lookup and discard all changes if
1763 * the transaction is aborted. The transaction status is cached in
1764 * txn->txn_flags so we can skip future changes and avoid CLOG lookups on the
1765 * next call.
1766 *
1767 * Return true if the transaction is aborted, otherwise return false.
1768 *
1769 * When the 'debug_logical_replication_streaming' is set to "immediate", we
1770 * don't check the transaction status, meaning the caller will always process
1771 * this transaction.
1772 */
1773static bool
1775{
1776 /* Quick return for regression tests */
1778 return false;
1779
1780 /*
1781 * Quick return if the transaction status is already known.
1782 */
1783
1784 if (rbtxn_is_committed(txn))
1785 return false;
1786 if (rbtxn_is_aborted(txn))
1787 {
1788 /* Already-aborted transactions should not have any changes */
1789 Assert(txn->size == 0);
1790
1791 return true;
1792 }
1793
1794 /* Otherwise, check the transaction status using CLOG lookup */
1795
1797 return false;
1798
1799 if (TransactionIdDidCommit(txn->xid))
1800 {
1801 /*
1802 * Remember the transaction is committed so that we can skip CLOG
1803 * check next time, avoiding the pressure on CLOG lookup.
1804 */
1805 Assert(!rbtxn_is_aborted(txn));
1807 return false;
1808 }
1809
1810 /*
1811 * The transaction aborted. We discard both the changes collected so far
1812 * and the toast reconstruction data. The full cleanup will happen as part
1813 * of decoding ABORT record of this transaction.
1814 */
1817
1818 /* All changes should be discarded */
1819 Assert(txn->size == 0);
1820
1821 /*
1822 * Mark the transaction as aborted so we can ignore future changes of this
1823 * transaction.
1824 */
1827
1828 return true;
1829}
1830
1831/*
1832 * Build a hash with a (relfilelocator, ctid) -> (cmin, cmax) mapping for use by
1833 * HeapTupleSatisfiesHistoricMVCC.
1834 */
1835static void
1837{
1838 dlist_iter iter;
1840
1842 return;
1843
1845 hash_ctl.entrysize = sizeof(ReorderBufferTupleCidEnt);
1846 hash_ctl.hcxt = rb->context;
1847
1848 /*
1849 * create the hash with the exact number of to-be-stored tuplecids from
1850 * the start
1851 */
1852 txn->tuplecid_hash =
1853 hash_create("ReorderBufferTupleCid", txn->ntuplecids, &hash_ctl,
1855
1856 dlist_foreach(iter, &txn->tuplecids)
1857 {
1860 bool found;
1861 ReorderBufferChange *change;
1862
1863 change = dlist_container(ReorderBufferChange, node, iter.cur);
1864
1866
1867 /* be careful about padding */
1868 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
1869
1870 key.rlocator = change->data.tuplecid.locator;
1871
1873 &key.tid);
1874
1876 hash_search(txn->tuplecid_hash, &key, HASH_ENTER, &found);
1877 if (!found)
1878 {
1879 ent->cmin = change->data.tuplecid.cmin;
1880 ent->cmax = change->data.tuplecid.cmax;
1881 ent->combocid = change->data.tuplecid.combocid;
1882 }
1883 else
1884 {
1885 /*
1886 * Maybe we already saw this tuple before in this transaction, but
1887 * if so it must have the same cmin.
1888 */
1889 Assert(ent->cmin == change->data.tuplecid.cmin);
1890
1891 /*
1892 * cmax may be initially invalid, but once set it can only grow,
1893 * and never become invalid again.
1894 */
1895 Assert((ent->cmax == InvalidCommandId) ||
1896 ((change->data.tuplecid.cmax != InvalidCommandId) &&
1897 (change->data.tuplecid.cmax > ent->cmax)));
1898 ent->cmax = change->data.tuplecid.cmax;
1899 }
1900 }
1901}
1902
1903/*
1904 * Copy a provided snapshot so we can modify it privately. This is needed so
1905 * that catalog modifying transactions can look into intermediate catalog
1906 * states.
1907 */
1908static Snapshot
1911{
1912 Snapshot snap;
1913 dlist_iter iter;
1914 int i = 0;
1915 Size size;
1916
1917 size = sizeof(SnapshotData) +
1918 sizeof(TransactionId) * orig_snap->xcnt +
1919 sizeof(TransactionId) * (txn->nsubtxns + 1);
1920
1921 snap = MemoryContextAllocZero(rb->context, size);
1922 memcpy(snap, orig_snap, sizeof(SnapshotData));
1923
1924 snap->copied = true;
1925 snap->active_count = 1; /* mark as active so nobody frees it */
1926 snap->regd_count = 0;
1927 snap->xip = (TransactionId *) (snap + 1);
1928
1929 memcpy(snap->xip, orig_snap->xip, sizeof(TransactionId) * snap->xcnt);
1930
1931 /*
1932 * snap->subxip contains all txids that belong to our transaction which we
1933 * need to check via cmin/cmax. That's why we store the toplevel
1934 * transaction in there as well.
1935 */
1936 snap->subxip = snap->xip + snap->xcnt;
1937 snap->subxip[i++] = txn->xid;
1938
1939 /*
1940 * txn->nsubtxns isn't decreased when subtransactions abort, so count
1941 * manually. Since it's an upper boundary it is safe to use it for the
1942 * allocation above.
1943 */
1944 snap->subxcnt = 1;
1945
1946 dlist_foreach(iter, &txn->subtxns)
1947 {
1949
1951 snap->subxip[i++] = sub_txn->xid;
1952 snap->subxcnt++;
1953 }
1954
1955 /* sort so we can bsearch() later */
1956 qsort(snap->subxip, snap->subxcnt, sizeof(TransactionId), xidComparator);
1957
1958 /* store the specified current CommandId */
1959 snap->curcid = cid;
1960
1961 return snap;
1962}
1963
1964/*
1965 * Free a previously ReorderBufferCopySnap'ed snapshot
1966 */
1967static void
1969{
1970 if (snap->copied)
1971 pfree(snap);
1972 else
1974}
1975
1976/*
1977 * If the transaction was (partially) streamed, we need to prepare or commit
1978 * it in a 'streamed' way. That is, we first stream the remaining part of the
1979 * transaction, and then invoke stream_prepare or stream_commit message as per
1980 * the case.
1981 */
1982static void
1984{
1985 /* we should only call this for previously streamed transactions */
1987
1989
1990 if (rbtxn_is_prepared(txn))
1991 {
1992 /*
1993 * Note, we send stream prepare even if a concurrent abort is
1994 * detected. See DecodePrepare for more information.
1995 */
1997 rb->stream_prepare(rb, txn, txn->final_lsn);
1999
2000 /*
2001 * This is a PREPARED transaction, part of a two-phase commit. The
2002 * full cleanup will happen as part of the COMMIT PREPAREDs, so now
2003 * just truncate txn by removing changes and tuplecids.
2004 */
2005 ReorderBufferTruncateTXN(rb, txn, true);
2006 /* Reset the CheckXidAlive */
2008 }
2009 else
2010 {
2011 rb->stream_commit(rb, txn, txn->final_lsn);
2013 }
2014}
2015
2016/*
2017 * Set xid to detect concurrent aborts.
2018 *
2019 * While streaming an in-progress transaction or decoding a prepared
2020 * transaction there is a possibility that the (sub)transaction might get
2021 * aborted concurrently. In such case if the (sub)transaction has catalog
2022 * update then we might decode the tuple using wrong catalog version. For
2023 * example, suppose there is one catalog tuple with (xmin: 500, xmax: 0). Now,
2024 * the transaction 501 updates the catalog tuple and after that we will have
2025 * two tuples (xmin: 500, xmax: 501) and (xmin: 501, xmax: 0). Now, if 501 is
2026 * aborted and some other transaction say 502 updates the same catalog tuple
2027 * then the first tuple will be changed to (xmin: 500, xmax: 502). So, the
2028 * problem is that when we try to decode the tuple inserted/updated in 501
2029 * after the catalog update, we will see the catalog tuple with (xmin: 500,
2030 * xmax: 502) as visible because it will consider that the tuple is deleted by
2031 * xid 502 which is not visible to our snapshot. And when we will try to
2032 * decode with that catalog tuple, it can lead to a wrong result or a crash.
2033 * So, it is necessary to detect concurrent aborts to allow streaming of
2034 * in-progress transactions or decoding of prepared transactions.
2035 *
2036 * For detecting the concurrent abort we set CheckXidAlive to the current
2037 * (sub)transaction's xid for which this change belongs to. And, during
2038 * catalog scan we can check the status of the xid and if it is aborted we will
2039 * report a specific error so that we can stop streaming current transaction
2040 * and discard the already streamed changes on such an error. We might have
2041 * already streamed some of the changes for the aborted (sub)transaction, but
2042 * that is fine because when we decode the abort we will stream abort message
2043 * to truncate the changes in the subscriber. Similarly, for prepared
2044 * transactions, we stop decoding if concurrent abort is detected and then
2045 * rollback the changes when rollback prepared is encountered. See
2046 * DecodePrepare.
2047 */
2048static inline void
2050{
2051 /*
2052 * If the input transaction id is already set as a CheckXidAlive then
2053 * nothing to do.
2054 */
2056 return;
2057
2058 /*
2059 * setup CheckXidAlive if it's not committed yet. We don't check if the
2060 * xid is aborted. That will happen during catalog access.
2061 */
2062 if (!TransactionIdDidCommit(xid))
2063 CheckXidAlive = xid;
2064 else
2066}
2067
2068/*
2069 * Helper function for ReorderBufferProcessTXN for applying change.
2070 */
2071static inline void
2073 Relation relation, ReorderBufferChange *change,
2074 bool streaming)
2075{
2076 if (streaming)
2077 rb->stream_change(rb, txn, relation, change);
2078 else
2079 rb->apply_change(rb, txn, relation, change);
2080}
2081
2082/*
2083 * Helper function for ReorderBufferProcessTXN for applying the truncate.
2084 */
2085static inline void
2087 int nrelations, Relation *relations,
2088 ReorderBufferChange *change, bool streaming)
2089{
2090 if (streaming)
2091 rb->stream_truncate(rb, txn, nrelations, relations, change);
2092 else
2093 rb->apply_truncate(rb, txn, nrelations, relations, change);
2094}
2095
2096/*
2097 * Helper function for ReorderBufferProcessTXN for applying the message.
2098 */
2099static inline void
2101 ReorderBufferChange *change, bool streaming)
2102{
2103 if (streaming)
2104 rb->stream_message(rb, txn, change->lsn, true,
2105 change->data.msg.prefix,
2106 change->data.msg.message_size,
2107 change->data.msg.message);
2108 else
2109 rb->message(rb, txn, change->lsn, true,
2110 change->data.msg.prefix,
2111 change->data.msg.message_size,
2112 change->data.msg.message);
2113}
2114
2115/*
2116 * Function to store the command id and snapshot at the end of the current
2117 * stream so that we can reuse the same while sending the next stream.
2118 */
2119static inline void
2121 Snapshot snapshot_now, CommandId command_id)
2122{
2123 txn->command_id = command_id;
2124
2125 /* Avoid copying if it's already copied. */
2126 if (snapshot_now->copied)
2127 txn->snapshot_now = snapshot_now;
2128 else
2129 txn->snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2130 txn, command_id);
2131}
2132
2133/*
2134 * Mark the given transaction as streamed if it's a top-level transaction
2135 * or has changes.
2136 */
2137static void
2139{
2140 /*
2141 * The top-level transaction, is marked as streamed always, even if it
2142 * does not contain any changes (that is, when all the changes are in
2143 * subtransactions).
2144 *
2145 * For subtransactions, we only mark them as streamed when there are
2146 * changes in them.
2147 *
2148 * We do it this way because of aborts - we don't want to send aborts for
2149 * XIDs the downstream is not aware of. And of course, it always knows
2150 * about the top-level xact (we send the XID in all messages), but we
2151 * never stream XIDs of empty subxacts.
2152 */
2153 if (rbtxn_is_toptxn(txn) || (txn->nentries_mem != 0))
2155}
2156
2157/*
2158 * Helper function for ReorderBufferProcessTXN to handle the concurrent
2159 * abort of the streaming transaction. This resets the TXN such that it
2160 * can be used to stream the remaining data of transaction being processed.
2161 * This can happen when the subtransaction is aborted and we still want to
2162 * continue processing the main or other subtransactions data.
2163 */
2164static void
2166 Snapshot snapshot_now,
2167 CommandId command_id,
2168 XLogRecPtr last_lsn,
2170{
2171 /* Discard the changes that we just streamed */
2173
2174 /* Free all resources allocated for toast reconstruction */
2176
2177 /* Return the spec insert change if it is not NULL */
2178 if (specinsert != NULL)
2179 {
2181 specinsert = NULL;
2182 }
2183
2184 /*
2185 * For the streaming case, stop the stream and remember the command ID and
2186 * snapshot for the streaming run.
2187 */
2188 if (rbtxn_is_streamed(txn))
2189 {
2190 rb->stream_stop(rb, txn, last_lsn);
2191 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2192 }
2193
2194 /* All changes must be deallocated */
2195 Assert(txn->size == 0);
2196}
2197
2198/*
2199 * Helper function for ReorderBufferReplay and ReorderBufferStreamTXN.
2200 *
2201 * Send data of a transaction (and its subtransactions) to the
2202 * output plugin. We iterate over the top and subtransactions (using a k-way
2203 * merge) and replay the changes in lsn order.
2204 *
2205 * If streaming is true then data will be sent using stream API.
2206 *
2207 * Note: "volatile" markers on some parameters are to avoid trouble with
2208 * PG_TRY inside the function.
2209 */
2210static void
2212 XLogRecPtr commit_lsn,
2213 volatile Snapshot snapshot_now,
2214 volatile CommandId command_id,
2215 bool streaming)
2216{
2217 bool using_subtxn;
2223 volatile bool stream_started = false;
2224 ReorderBufferTXN *volatile curtxn = NULL;
2225
2226 /* build data to be able to lookup the CommandIds of catalog tuples */
2228
2229 /* setup the initial snapshot */
2230 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2231
2232 /*
2233 * Decoding needs access to syscaches et al., which in turn use
2234 * heavyweight locks and such. Thus we need to have enough state around to
2235 * keep track of those. The easiest way is to simply use a transaction
2236 * internally. That also allows us to easily enforce that nothing writes
2237 * to the database by checking for xid assignments.
2238 *
2239 * When we're called via the SQL SRF there's already a transaction
2240 * started, so start an explicit subtransaction there.
2241 */
2243
2244 PG_TRY();
2245 {
2246 ReorderBufferChange *change;
2247 int changes_count = 0; /* used to accumulate the number of
2248 * changes */
2249
2250 if (using_subtxn)
2251 BeginInternalSubTransaction(streaming ? "stream" : "replay");
2252 else
2254
2255 /*
2256 * We only need to send begin/begin-prepare for non-streamed
2257 * transactions.
2258 */
2259 if (!streaming)
2260 {
2261 if (rbtxn_is_prepared(txn))
2262 rb->begin_prepare(rb, txn);
2263 else
2264 rb->begin(rb, txn);
2265 }
2266
2268 while ((change = ReorderBufferIterTXNNext(rb, iterstate)) != NULL)
2269 {
2270 Relation relation = NULL;
2271 Oid reloid;
2272
2274
2275 /*
2276 * We can't call start stream callback before processing first
2277 * change.
2278 */
2280 {
2281 if (streaming)
2282 {
2283 txn->origin_id = change->origin_id;
2284 rb->stream_start(rb, txn, change->lsn);
2285 stream_started = true;
2286 }
2287 }
2288
2289 /*
2290 * Enforce correct ordering of changes, merged from multiple
2291 * subtransactions. The changes may have the same LSN due to
2292 * MULTI_INSERT xlog records.
2293 */
2295
2296 prev_lsn = change->lsn;
2297
2298 /*
2299 * Set the current xid to detect concurrent aborts. This is
2300 * required for the cases when we decode the changes before the
2301 * COMMIT record is processed.
2302 */
2303 if (streaming || rbtxn_is_prepared(change->txn))
2304 {
2305 curtxn = change->txn;
2307 }
2308
2309 switch (change->action)
2310 {
2312
2313 /*
2314 * Confirmation for speculative insertion arrived. Simply
2315 * use as a normal record. It'll be cleaned up at the end
2316 * of INSERT processing.
2317 */
2318 if (specinsert == NULL)
2319 elog(ERROR, "invalid ordering of speculative insertion changes");
2320 Assert(specinsert->data.tp.oldtuple == NULL);
2321 change = specinsert;
2323
2324 /* intentionally fall through */
2329 Assert(snapshot_now);
2330
2331 reloid = RelidByRelfilenumber(change->data.tp.rlocator.spcOid,
2332 change->data.tp.rlocator.relNumber);
2333
2334 /*
2335 * Mapped catalog tuple without data, emitted while
2336 * catalog table was in the process of being rewritten. We
2337 * can fail to look up the relfilenumber, because the
2338 * relmapper has no "historic" view, in contrast to the
2339 * normal catalog during decoding. Thus repeated rewrites
2340 * can cause a lookup failure. That's OK because we do not
2341 * decode catalog changes anyway. Normally such tuples
2342 * would be skipped over below, but we can't identify
2343 * whether the table should be logically logged without
2344 * mapping the relfilenumber to the oid.
2345 */
2346 if (reloid == InvalidOid &&
2347 change->data.tp.newtuple == NULL &&
2348 change->data.tp.oldtuple == NULL)
2349 goto change_done;
2350 else if (reloid == InvalidOid)
2351 elog(ERROR, "could not map filenumber \"%s\" to relation OID",
2352 relpathperm(change->data.tp.rlocator,
2353 MAIN_FORKNUM).str);
2354
2355 relation = RelationIdGetRelation(reloid);
2356
2357 if (!RelationIsValid(relation))
2358 elog(ERROR, "could not open relation with OID %u (for filenumber \"%s\")",
2359 reloid,
2360 relpathperm(change->data.tp.rlocator,
2361 MAIN_FORKNUM).str);
2362
2363 if (!RelationIsLogicallyLogged(relation))
2364 goto change_done;
2365
2366 /*
2367 * Ignore temporary heaps created during DDL unless the
2368 * plugin has asked for them.
2369 */
2370 if (relation->rd_rel->relrewrite && !rb->output_rewrites)
2371 goto change_done;
2372
2373 /*
2374 * For now ignore sequence changes entirely. Most of the
2375 * time they don't log changes using records we
2376 * understand, so it doesn't make sense to handle the few
2377 * cases we do.
2378 */
2379 if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
2380 goto change_done;
2381
2382 /* user-triggered change */
2383 if (!IsToastRelation(relation))
2384 {
2385 ReorderBufferToastReplace(rb, txn, relation, change);
2386 ReorderBufferApplyChange(rb, txn, relation, change,
2387 streaming);
2388
2389 /*
2390 * Only clear reassembled toast chunks if we're sure
2391 * they're not required anymore. The creator of the
2392 * tuple tells us.
2393 */
2394 if (change->data.tp.clear_toast_afterwards)
2396 }
2397 /* we're not interested in toast deletions */
2398 else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
2399 {
2400 /*
2401 * Need to reassemble the full toasted Datum in
2402 * memory, to ensure the chunks don't get reused till
2403 * we're done remove it from the list of this
2404 * transaction's changes. Otherwise it will get
2405 * freed/reused while restoring spooled data from
2406 * disk.
2407 */
2408 Assert(change->data.tp.newtuple != NULL);
2409
2410 dlist_delete(&change->node);
2411 ReorderBufferToastAppendChunk(rb, txn, relation,
2412 change);
2413 }
2414
2416
2417 /*
2418 * If speculative insertion was confirmed, the record
2419 * isn't needed anymore.
2420 */
2421 if (specinsert != NULL)
2422 {
2424 specinsert = NULL;
2425 }
2426
2427 if (RelationIsValid(relation))
2428 {
2429 RelationClose(relation);
2430 relation = NULL;
2431 }
2432 break;
2433
2435
2436 /*
2437 * Speculative insertions are dealt with by delaying the
2438 * processing of the insert until the confirmation record
2439 * arrives. For that we simply unlink the record from the
2440 * chain, so it does not get freed/reused while restoring
2441 * spooled data from disk.
2442 *
2443 * This is safe in the face of concurrent catalog changes
2444 * because the relevant relation can't be changed between
2445 * speculative insertion and confirmation due to
2446 * CheckTableNotInUse() and locking.
2447 */
2448
2449 /* Previous speculative insertion must be aborted */
2451
2452 /* and memorize the pending insertion */
2453 dlist_delete(&change->node);
2454 specinsert = change;
2455 break;
2456
2458
2459 /*
2460 * Abort for speculative insertion arrived. So cleanup the
2461 * specinsert tuple and toast hash.
2462 *
2463 * Note that we get the spec abort change for each toast
2464 * entry but we need to perform the cleanup only the first
2465 * time we get it for the main table.
2466 */
2467 if (specinsert != NULL)
2468 {
2469 /*
2470 * We must clean the toast hash before processing a
2471 * completely new tuple to avoid confusion about the
2472 * previous tuple's toast chunks.
2473 */
2476
2477 /* We don't need this record anymore. */
2479 specinsert = NULL;
2480 }
2481 break;
2482
2484 {
2485 int i;
2486 int nrelids = change->data.truncate.nrelids;
2487 int nrelations = 0;
2488 Relation *relations;
2489
2490 relations = palloc0_array(Relation, nrelids);
2491 for (i = 0; i < nrelids; i++)
2492 {
2493 Oid relid = change->data.truncate.relids[i];
2494 Relation rel;
2495
2496 rel = RelationIdGetRelation(relid);
2497
2498 if (!RelationIsValid(rel))
2499 elog(ERROR, "could not open relation with OID %u", relid);
2500
2501 if (!RelationIsLogicallyLogged(rel))
2502 continue;
2503
2504 relations[nrelations++] = rel;
2505 }
2506
2507 /* Apply the truncate. */
2509 relations, change,
2510 streaming);
2511
2512 for (i = 0; i < nrelations; i++)
2513 RelationClose(relations[i]);
2514
2515 break;
2516 }
2517
2519 ReorderBufferApplyMessage(rb, txn, change, streaming);
2520 break;
2521
2523 /* Execute the invalidation messages locally */
2525 change->data.inval.invalidations);
2526 break;
2527
2529 /* get rid of the old */
2531
2532 if (snapshot_now->copied)
2533 {
2534 ReorderBufferFreeSnap(rb, snapshot_now);
2535 snapshot_now =
2537 txn, command_id);
2538 }
2539
2540 /*
2541 * Restored from disk, need to be careful not to double
2542 * free. We could introduce refcounting for that, but for
2543 * now this seems infrequent enough not to care.
2544 */
2545 else if (change->data.snapshot->copied)
2546 {
2547 snapshot_now =
2549 txn, command_id);
2550 }
2551 else
2552 {
2553 snapshot_now = change->data.snapshot;
2554 }
2555
2556 /* and continue with the new one */
2557 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2558 break;
2559
2562
2563 if (command_id < change->data.command_id)
2564 {
2565 command_id = change->data.command_id;
2566
2567 if (!snapshot_now->copied)
2568 {
2569 /* we don't use the global one anymore */
2570 snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2571 txn, command_id);
2572 }
2573
2574 snapshot_now->curcid = command_id;
2575
2577 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2578 }
2579
2580 break;
2581
2583 elog(ERROR, "tuplecid value in changequeue");
2584 break;
2585 }
2586
2587 /*
2588 * It is possible that the data is not sent to downstream for a
2589 * long time either because the output plugin filtered it or there
2590 * is a DDL that generates a lot of data that is not processed by
2591 * the plugin. So, in such cases, the downstream can timeout. To
2592 * avoid that we try to send a keepalive message if required.
2593 * Trying to send a keepalive message after every change has some
2594 * overhead, but testing showed there is no noticeable overhead if
2595 * we do it after every ~100 changes.
2596 */
2597#define CHANGES_THRESHOLD 100
2598
2600 {
2601 rb->update_progress_txn(rb, txn, prev_lsn);
2602 changes_count = 0;
2603 }
2604 }
2605
2606 /* speculative insertion record must be freed by now */
2608
2609 /* clean up the iterator */
2611 iterstate = NULL;
2612
2613 /*
2614 * Update total transaction count and total bytes processed by the
2615 * transaction and its subtransactions. Ensure to not count the
2616 * streamed transaction multiple times.
2617 *
2618 * Note that the statistics computation has to be done after
2619 * ReorderBufferIterTXNFinish as it releases the serialized change
2620 * which we have already accounted in ReorderBufferIterTXNNext.
2621 */
2622 if (!rbtxn_is_streamed(txn))
2623 rb->totalTxns++;
2624
2625 rb->totalBytes += txn->total_size;
2626
2627 /*
2628 * Done with current changes, send the last message for this set of
2629 * changes depending upon streaming mode.
2630 */
2631 if (streaming)
2632 {
2633 if (stream_started)
2634 {
2635 rb->stream_stop(rb, txn, prev_lsn);
2636 stream_started = false;
2637 }
2638 }
2639 else
2640 {
2641 /*
2642 * Call either PREPARE (for two-phase transactions) or COMMIT (for
2643 * regular ones).
2644 */
2645 if (rbtxn_is_prepared(txn))
2646 {
2648 rb->prepare(rb, txn, commit_lsn);
2650 }
2651 else
2652 rb->commit(rb, txn, commit_lsn);
2653 }
2654
2655 /* this is just a sanity check against bad output plugin behaviour */
2657 elog(ERROR, "output plugin used XID %u",
2659
2660 /*
2661 * Remember the command ID and snapshot for the next set of changes in
2662 * streaming mode.
2663 */
2664 if (streaming)
2665 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2666 else if (snapshot_now->copied)
2667 ReorderBufferFreeSnap(rb, snapshot_now);
2668
2669 /* cleanup */
2671
2672 /*
2673 * Aborting the current (sub-)transaction as a whole has the right
2674 * semantics. We want all locks acquired in here to be released, not
2675 * reassigned to the parent and we do not want any database access
2676 * have persistent effects.
2677 */
2679
2680 /* make sure there's no cache pollution */
2682 {
2685 }
2686 else
2687 {
2691 }
2692
2693 if (using_subtxn)
2694 {
2697 CurrentResourceOwner = cowner;
2698 }
2699
2700 /*
2701 * We are here due to one of the four reasons: 1. Decoding an
2702 * in-progress txn. 2. Decoding a prepared txn. 3. Decoding of a
2703 * prepared txn that was (partially) streamed. 4. Decoding a committed
2704 * txn.
2705 *
2706 * For 1, we allow truncation of txn data by removing the changes
2707 * already streamed but still keeping other things like invalidations,
2708 * snapshot, and tuplecids. For 2 and 3, we indicate
2709 * ReorderBufferTruncateTXN to do more elaborate truncation of txn
2710 * data as the entire transaction has been decoded except for commit.
2711 * For 4, as the entire txn has been decoded, we can fully clean up
2712 * the TXN reorder buffer.
2713 */
2714 if (streaming || rbtxn_is_prepared(txn))
2715 {
2716 if (streaming)
2718
2720 /* Reset the CheckXidAlive */
2722 }
2723 else
2725 }
2726 PG_CATCH();
2727 {
2730
2731 /* TODO: Encapsulate cleanup from the PG_TRY and PG_CATCH blocks */
2732 if (iterstate)
2734
2736
2737 /*
2738 * Force cache invalidation to happen outside of a valid transaction
2739 * to prevent catalog access as we just caught an error.
2740 */
2742
2743 /* make sure there's no cache pollution */
2745 {
2748 }
2749 else
2750 {
2754 }
2755
2756 if (using_subtxn)
2757 {
2760 CurrentResourceOwner = cowner;
2761 }
2762
2763 /*
2764 * The error code ERRCODE_TRANSACTION_ROLLBACK indicates a concurrent
2765 * abort of the (sub)transaction we are streaming or preparing. We
2766 * need to do the cleanup and return gracefully on this error, see
2767 * SetupCheckXidLive.
2768 *
2769 * This error code can be thrown by one of the callbacks we call
2770 * during decoding so we need to ensure that we return gracefully only
2771 * when we are sending the data in streaming mode and the streaming is
2772 * not finished yet or when we are sending the data out on a PREPARE
2773 * during a two-phase commit.
2774 */
2775 if (errdata->sqlerrcode == ERRCODE_TRANSACTION_ROLLBACK &&
2777 {
2778 /* curtxn must be set for streaming or prepared transactions */
2779 Assert(curtxn);
2780
2781 /* Cleanup the temporary error state. */
2784 errdata = NULL;
2785
2786 /* Remember the transaction is aborted. */
2788 curtxn->txn_flags |= RBTXN_IS_ABORTED;
2789
2790 /* Mark the transaction is streamed if appropriate */
2791 if (stream_started)
2793
2794 /* Reset the TXN so that it is allowed to stream remaining data. */
2795 ReorderBufferResetTXN(rb, txn, snapshot_now,
2796 command_id, prev_lsn,
2797 specinsert);
2798 }
2799 else
2800 {
2803 PG_RE_THROW();
2804 }
2805 }
2806 PG_END_TRY();
2807}
2808
2809/*
2810 * Perform the replay of a transaction and its non-aborted subtransactions.
2811 *
2812 * Subtransactions previously have to be processed by
2813 * ReorderBufferCommitChild(), even if previously assigned to the toplevel
2814 * transaction with ReorderBufferAssignChild.
2815 *
2816 * This interface is called once a prepare or toplevel commit is read for both
2817 * streamed as well as non-streamed transactions.
2818 */
2819static void
2822 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
2823 TimestampTz commit_time,
2824 ReplOriginId origin_id, XLogRecPtr origin_lsn)
2825{
2826 Snapshot snapshot_now;
2827 CommandId command_id = FirstCommandId;
2828
2829 txn->final_lsn = commit_lsn;
2830 txn->end_lsn = end_lsn;
2831 txn->commit_time = commit_time;
2832 txn->origin_id = origin_id;
2833 txn->origin_lsn = origin_lsn;
2834
2835 /*
2836 * If the transaction was (partially) streamed, we need to commit it in a
2837 * 'streamed' way. That is, we first stream the remaining part of the
2838 * transaction, and then invoke stream_commit message.
2839 *
2840 * Called after everything (origin ID, LSN, ...) is stored in the
2841 * transaction to avoid passing that information directly.
2842 */
2843 if (rbtxn_is_streamed(txn))
2844 {
2846 return;
2847 }
2848
2849 /*
2850 * If this transaction has no snapshot, it didn't make any changes to the
2851 * database, so there's nothing to decode. Note that
2852 * ReorderBufferCommitChild will have transferred any snapshots from
2853 * subtransactions if there were any.
2854 */
2855 if (txn->base_snapshot == NULL)
2856 {
2857 Assert(txn->ninvalidations == 0);
2858
2859 /*
2860 * Removing this txn before a commit might result in the computation
2861 * of an incorrect restart_lsn. See SnapBuildProcessRunningXacts.
2862 */
2863 if (!rbtxn_is_prepared(txn))
2865 return;
2866 }
2867
2868 snapshot_now = txn->base_snapshot;
2869
2870 /* Process and send the changes to output plugin. */
2871 ReorderBufferProcessTXN(rb, txn, commit_lsn, snapshot_now,
2872 command_id, false);
2873}
2874
2875/*
2876 * Commit a transaction.
2877 *
2878 * See comments for ReorderBufferReplay().
2879 */
2880void
2882 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
2883 TimestampTz commit_time,
2884 ReplOriginId origin_id, XLogRecPtr origin_lsn)
2885{
2886 ReorderBufferTXN *txn;
2887
2888 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2889 false);
2890
2891 /* unknown transaction, nothing to replay */
2892 if (txn == NULL)
2893 return;
2894
2895 ReorderBufferReplay(txn, rb, xid, commit_lsn, end_lsn, commit_time,
2896 origin_id, origin_lsn);
2897}
2898
2899/*
2900 * Record the prepare information for a transaction. Also, mark the transaction
2901 * as a prepared transaction.
2902 */
2903bool
2905 XLogRecPtr prepare_lsn, XLogRecPtr end_lsn,
2906 TimestampTz prepare_time,
2907 ReplOriginId origin_id, XLogRecPtr origin_lsn)
2908{
2909 ReorderBufferTXN *txn;
2910
2911 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2912
2913 /* unknown transaction, nothing to do */
2914 if (txn == NULL)
2915 return false;
2916
2917 /*
2918 * Remember the prepare information to be later used by commit prepared in
2919 * case we skip doing prepare.
2920 */
2921 txn->final_lsn = prepare_lsn;
2922 txn->end_lsn = end_lsn;
2923 txn->prepare_time = prepare_time;
2924 txn->origin_id = origin_id;
2925 txn->origin_lsn = origin_lsn;
2926
2927 /* Mark this transaction as a prepared transaction */
2930
2931 return true;
2932}
2933
2934/* Remember that we have skipped prepare */
2935void
2937{
2938 ReorderBufferTXN *txn;
2939
2940 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2941
2942 /* unknown transaction, nothing to do */
2943 if (txn == NULL)
2944 return;
2945
2946 /* txn must have been marked as a prepared transaction */
2949}
2950
2951/*
2952 * Prepare a two-phase transaction.
2953 *
2954 * See comments for ReorderBufferReplay().
2955 */
2956void
2958 char *gid)
2959{
2960 ReorderBufferTXN *txn;
2961
2962 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2963 false);
2964
2965 /* unknown transaction, nothing to replay */
2966 if (txn == NULL)
2967 return;
2968
2969 /*
2970 * txn must have been marked as a prepared transaction and must have
2971 * neither been skipped nor sent a prepare. Also, the prepare info must
2972 * have been updated in it by now.
2973 */
2976
2977 txn->gid = pstrdup(gid);
2978
2979 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
2980 txn->prepare_time, txn->origin_id, txn->origin_lsn);
2981
2982 /*
2983 * Send a prepare if not already done so. This might occur if we have
2984 * detected a concurrent abort while replaying the non-streaming
2985 * transaction.
2986 */
2987 if (!rbtxn_sent_prepare(txn))
2988 {
2989 rb->prepare(rb, txn, txn->final_lsn);
2991 }
2992}
2993
2994/*
2995 * This is used to handle COMMIT/ROLLBACK PREPARED.
2996 */
2997void
2999 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
3000 XLogRecPtr two_phase_at,
3001 TimestampTz commit_time, ReplOriginId origin_id,
3002 XLogRecPtr origin_lsn, char *gid, bool is_commit)
3003{
3004 ReorderBufferTXN *txn;
3005 XLogRecPtr prepare_end_lsn;
3006 TimestampTz prepare_time;
3007
3008 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, commit_lsn, false);
3009
3010 /* unknown transaction, nothing to do */
3011 if (txn == NULL)
3012 return;
3013
3014 /*
3015 * By this time the txn has the prepare record information, remember it to
3016 * be later used for rollback.
3017 */
3018 prepare_end_lsn = txn->end_lsn;
3019 prepare_time = txn->prepare_time;
3020
3021 /* add the gid in the txn */
3022 txn->gid = pstrdup(gid);
3023
3024 /*
3025 * It is possible that this transaction is not decoded at prepare time
3026 * either because by that time we didn't have a consistent snapshot, or
3027 * two_phase was not enabled, or it was decoded earlier but we have
3028 * restarted. We only need to send the prepare if it was not decoded
3029 * earlier. We don't need to decode the xact for aborts if it is not done
3030 * already.
3031 */
3032 if ((txn->final_lsn < two_phase_at) && is_commit)
3033 {
3034 /*
3035 * txn must have been marked as a prepared transaction and skipped but
3036 * not sent a prepare. Also, the prepare info must have been updated
3037 * in txn even if we skip prepare.
3038 */
3042
3043 /*
3044 * By this time the txn has the prepare record information and it is
3045 * important to use that so that downstream gets the accurate
3046 * information. If instead, we have passed commit information here
3047 * then downstream can behave as it has already replayed commit
3048 * prepared after the restart.
3049 */
3050 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
3051 txn->prepare_time, txn->origin_id, txn->origin_lsn);
3052 }
3053
3054 txn->final_lsn = commit_lsn;
3055 txn->end_lsn = end_lsn;
3056 txn->commit_time = commit_time;
3057 txn->origin_id = origin_id;
3058 txn->origin_lsn = origin_lsn;
3059
3060 if (is_commit)
3061 rb->commit_prepared(rb, txn, commit_lsn);
3062 else
3063 rb->rollback_prepared(rb, txn, prepare_end_lsn, prepare_time);
3064
3065 /* cleanup: make sure there's no cache pollution */
3067 txn->invalidations);
3069}
3070
3071/*
3072 * Abort a transaction that possibly has previous changes. Needs to be first
3073 * called for subtransactions and then for the toplevel xid.
3074 *
3075 * NB: Transactions handled here have to have actively aborted (i.e. have
3076 * produced an abort record). Implicitly aborted transactions are handled via
3077 * ReorderBufferAbortOld(); transactions we're just not interested in, but
3078 * which have committed are handled in ReorderBufferForget().
3079 *
3080 * This function purges this transaction and its contents from memory and
3081 * disk.
3082 */
3083void
3085 TimestampTz abort_time)
3086{
3087 ReorderBufferTXN *txn;
3088
3089 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3090 false);
3091
3092 /* unknown, nothing to remove */
3093 if (txn == NULL)
3094 return;
3095
3096 txn->abort_time = abort_time;
3097
3098 /* For streamed transactions notify the remote node about the abort. */
3099 if (rbtxn_is_streamed(txn))
3100 {
3101 rb->stream_abort(rb, txn, lsn);
3102
3103 /*
3104 * We might have decoded changes for this transaction that could load
3105 * the cache as per the current transaction's view (consider DDL's
3106 * happened in this transaction). We don't want the decoding of future
3107 * transactions to use those cache entries so execute only the inval
3108 * messages in this transaction.
3109 */
3110 if (txn->ninvalidations > 0)
3112 txn->invalidations);
3113 }
3114
3115 /* cosmetic... */
3116 txn->final_lsn = lsn;
3117
3118 /* remove potential on-disk data, and deallocate */
3120}
3121
3122/*
3123 * Abort all transactions that aren't actually running anymore because the
3124 * server restarted.
3125 *
3126 * NB: These really have to be transactions that have aborted due to a server
3127 * crash/immediate restart, as we don't deal with invalidations here.
3128 */
3129void
3131{
3133
3134 /*
3135 * Iterate through all (potential) toplevel TXNs and abort all that are
3136 * older than what possibly can be running. Once we've found the first
3137 * that is alive we stop, there might be some that acquired an xid earlier
3138 * but started writing later, but it's unlikely and they will be cleaned
3139 * up in a later call to this function.
3140 */
3141 dlist_foreach_modify(it, &rb->toplevel_by_lsn)
3142 {
3143 ReorderBufferTXN *txn;
3144
3145 txn = dlist_container(ReorderBufferTXN, node, it.cur);
3146
3147 if (TransactionIdPrecedes(txn->xid, oldestRunningXid))
3148 {
3149 elog(DEBUG2, "aborting old transaction %u", txn->xid);
3150
3151 /* Notify the remote node about the crash/immediate restart. */
3152 if (rbtxn_is_streamed(txn))
3153 rb->stream_abort(rb, txn, InvalidXLogRecPtr);
3154
3155 /* remove potential on-disk data, and deallocate this tx */
3157 }
3158 else
3159 return;
3160 }
3161}
3162
3163/*
3164 * Forget the contents of a transaction if we aren't interested in its
3165 * contents. Needs to be first called for subtransactions and then for the
3166 * toplevel xid.
3167 *
3168 * This is significantly different to ReorderBufferAbort() because
3169 * transactions that have committed need to be treated differently from aborted
3170 * ones since they may have modified the catalog.
3171 *
3172 * Note that this is only allowed to be called in the moment a transaction
3173 * commit has just been read, not earlier; otherwise later records referring
3174 * to this xid might re-create the transaction incompletely.
3175 */
3176void
3178{
3179 ReorderBufferTXN *txn;
3180
3181 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3182 false);
3183
3184 /* unknown, nothing to forget */
3185 if (txn == NULL)
3186 return;
3187
3188 /* this transaction mustn't be streamed */
3190
3191 /* cosmetic... */
3192 txn->final_lsn = lsn;
3193
3194 /*
3195 * Process only cache invalidation messages in this transaction if there
3196 * are any. Even if we're not interested in the transaction's contents, it
3197 * could have manipulated the catalog and we need to update the caches
3198 * according to that.
3199 */
3200 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3202 txn->invalidations);
3203 else
3204 Assert(txn->ninvalidations == 0);
3205
3206 /* remove potential on-disk data, and deallocate */
3208}
3209
3210/*
3211 * Invalidate cache for those transactions that need to be skipped just in case
3212 * catalogs were manipulated as part of the transaction.
3213 *
3214 * Note that this is a special-purpose function for prepared transactions where
3215 * we don't want to clean up the TXN even when we decide to skip it. See
3216 * DecodePrepare.
3217 */
3218void
3220{
3221 ReorderBufferTXN *txn;
3222
3223 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3224 false);
3225
3226 /* unknown, nothing to do */
3227 if (txn == NULL)
3228 return;
3229
3230 /*
3231 * Process cache invalidation messages if there are any. Even if we're not
3232 * interested in the transaction's contents, it could have manipulated the
3233 * catalog and we need to update the caches according to that.
3234 */
3235 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3237 txn->invalidations);
3238 else
3239 Assert(txn->ninvalidations == 0);
3240}
3241
3242
3243/*
3244 * Execute invalidations happening outside the context of a decoded
3245 * transaction. That currently happens either for xid-less commits
3246 * (cf. RecordTransactionCommit()) or for invalidations in uninteresting
3247 * transactions (via ReorderBufferForget()).
3248 */
3249void
3251 SharedInvalidationMessage *invalidations)
3252{
3256 int i;
3257
3258 if (use_subtxn)
3260
3261 /*
3262 * Force invalidations to happen outside of a valid transaction - that way
3263 * entries will just be marked as invalid without accessing the catalog.
3264 * That's advantageous because we don't need to setup the full state
3265 * necessary for catalog access.
3266 */
3267 if (use_subtxn)
3269
3270 for (i = 0; i < ninvalidations; i++)
3271 LocalExecuteInvalidationMessage(&invalidations[i]);
3272
3273 if (use_subtxn)
3274 {
3277 CurrentResourceOwner = cowner;
3278 }
3279}
3280
3281/*
3282 * Tell reorderbuffer about an xid seen in the WAL stream. Has to be called at
3283 * least once for every xid in XLogRecord->xl_xid (other places in records
3284 * may, but do not have to be passed through here).
3285 *
3286 * Reorderbuffer keeps some data structures about transactions in LSN order,
3287 * for efficiency. To do that it has to know about when transactions are seen
3288 * first in the WAL. As many types of records are not actually interesting for
3289 * logical decoding, they do not necessarily pass through here.
3290 */
3291void
3293{
3294 /* many records won't have an xid assigned, centralize check here */
3295 if (xid != InvalidTransactionId)
3296 ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3297}
3298
3299/*
3300 * Add a new snapshot to this transaction that may only used after lsn 'lsn'
3301 * because the previous snapshot doesn't describe the catalog correctly for
3302 * following rows.
3303 */
3304void
3307{
3309
3310 change->data.snapshot = snap;
3312
3313 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3314}
3315
3316/*
3317 * Set up the transaction's base snapshot.
3318 *
3319 * If we know that xid is a subtransaction, set the base snapshot on the
3320 * top-level transaction instead.
3321 */
3322void
3325{
3326 ReorderBufferTXN *txn;
3327 bool is_new;
3328
3329 Assert(snap != NULL);
3330
3331 /*
3332 * Fetch the transaction to operate on. If we know it's a subtransaction,
3333 * operate on its top-level transaction instead.
3334 */
3335 txn = ReorderBufferTXNByXid(rb, xid, true, &is_new, lsn, true);
3336 if (rbtxn_is_known_subxact(txn))
3337 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3338 NULL, InvalidXLogRecPtr, false);
3339 Assert(txn->base_snapshot == NULL);
3340
3341 txn->base_snapshot = snap;
3342 txn->base_snapshot_lsn = lsn;
3343 dlist_push_tail(&rb->txns_by_base_snapshot_lsn, &txn->base_snapshot_node);
3344
3346}
3347
3348/*
3349 * Access the catalog with this CommandId at this point in the changestream.
3350 *
3351 * May only be called for command ids > 1
3352 */
3353void
3356{
3358
3359 change->data.command_id = cid;
3361
3362 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3363}
3364
3365/*
3366 * Update memory counters to account for the new or removed change.
3367 *
3368 * We update two counters - in the reorder buffer, and in the transaction
3369 * containing the change. The reorder buffer counter allows us to quickly
3370 * decide if we reached the memory limit, the transaction counter allows
3371 * us to quickly pick the largest transaction for eviction.
3372 *
3373 * Either txn or change must be non-NULL at least. We update the memory
3374 * counter of txn if it's non-NULL, otherwise change->txn.
3375 *
3376 * When streaming is enabled, we need to update the toplevel transaction
3377 * counters instead - we don't really care about subtransactions as we
3378 * can't stream them individually anyway, and we only pick toplevel
3379 * transactions for eviction. So only toplevel transactions matter.
3380 */
3381static void
3383 ReorderBufferChange *change,
3384 ReorderBufferTXN *txn,
3385 bool addition, Size sz)
3386{
3387 ReorderBufferTXN *toptxn;
3388
3389 Assert(txn || change);
3390
3391 /*
3392 * Ignore tuple CID changes, because those are not evicted when reaching
3393 * memory limit. So we just don't count them, because it might easily
3394 * trigger a pointless attempt to spill.
3395 */
3396 if (change && change->action == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID)
3397 return;
3398
3399 if (sz == 0)
3400 return;
3401
3402 if (txn == NULL)
3403 txn = change->txn;
3404 Assert(txn != NULL);
3405
3406 /*
3407 * Update the total size in top level as well. This is later used to
3408 * compute the decoding stats.
3409 */
3410 toptxn = rbtxn_get_toptxn(txn);
3411
3412 if (addition)
3413 {
3414 Size oldsize = txn->size;
3415
3416 txn->size += sz;
3417 rb->size += sz;
3418
3419 /* Update the total size in the top transaction. */
3420 toptxn->total_size += sz;
3421
3422 /* Update the max-heap */
3423 if (oldsize != 0)
3424 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3425 pairingheap_add(rb->txn_heap, &txn->txn_node);
3426 }
3427 else
3428 {
3429 Assert((rb->size >= sz) && (txn->size >= sz));
3430 txn->size -= sz;
3431 rb->size -= sz;
3432
3433 /* Update the total size in the top transaction. */
3434 toptxn->total_size -= sz;
3435
3436 /* Update the max-heap */
3437 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3438 if (txn->size != 0)
3439 pairingheap_add(rb->txn_heap, &txn->txn_node);
3440 }
3441
3442 Assert(txn->size <= rb->size);
3443}
3444
3445/*
3446 * Add new (relfilelocator, tid) -> (cmin, cmax) mappings.
3447 *
3448 * We do not include this change type in memory accounting, because we
3449 * keep CIDs in a separate list and do not evict them when reaching
3450 * the memory limit.
3451 */
3452void
3454 XLogRecPtr lsn, RelFileLocator locator,
3455 ItemPointerData tid, CommandId cmin,
3456 CommandId cmax, CommandId combocid)
3457{
3459 ReorderBufferTXN *txn;
3460
3461 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3462
3463 change->data.tuplecid.locator = locator;
3464 change->data.tuplecid.tid = tid;
3465 change->data.tuplecid.cmin = cmin;
3466 change->data.tuplecid.cmax = cmax;
3467 change->data.tuplecid.combocid = combocid;
3468 change->lsn = lsn;
3469 change->txn = txn;
3471
3472 dlist_push_tail(&txn->tuplecids, &change->node);
3473 txn->ntuplecids++;
3474}
3475
3476/*
3477 * Add new invalidation messages to the reorder buffer queue.
3478 */
3479static void
3481 XLogRecPtr lsn, Size nmsgs,
3483{
3484 ReorderBufferChange *change;
3485
3486 change = ReorderBufferAllocChange(rb);
3488 change->data.inval.ninvalidations = nmsgs;
3490 memcpy(change->data.inval.invalidations, msgs,
3491 sizeof(SharedInvalidationMessage) * nmsgs);
3492
3493 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3494}
3495
3496/*
3497 * A helper function for ReorderBufferAddInvalidations() and
3498 * ReorderBufferAddDistributedInvalidations() to accumulate the invalidation
3499 * messages to the **invals_out.
3500 */
3501static void
3506{
3507 if (*ninvals_out == 0)
3508 {
3512 }
3513 else
3514 {
3515 /* Enlarge the array of inval messages */
3516 *invals_out =
3518 (*ninvals_out + nmsgs_new));
3522 }
3523}
3524
3525/*
3526 * Accumulate the invalidations for executing them later.
3527 *
3528 * This needs to be called for each XLOG_XACT_INVALIDATIONS message and
3529 * accumulates all the invalidation messages in the toplevel transaction, if
3530 * available, otherwise in the current transaction, as well as in the form of
3531 * change in reorder buffer. We require to record it in form of the change
3532 * so that we can execute only the required invalidations instead of executing
3533 * all the invalidations on each CommandId increment. We also need to
3534 * accumulate these in the txn buffer because in some cases where we skip
3535 * processing the transaction (see ReorderBufferForget), we need to execute
3536 * all the invalidations together.
3537 */
3538void
3540 XLogRecPtr lsn, Size nmsgs,
3542{
3543 ReorderBufferTXN *txn;
3544 MemoryContext oldcontext;
3545
3546 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3547
3548 oldcontext = MemoryContextSwitchTo(rb->context);
3549
3550 /*
3551 * Collect all the invalidations under the top transaction, if available,
3552 * so that we can execute them all together. See comments atop this
3553 * function.
3554 */
3555 txn = rbtxn_get_toptxn(txn);
3556
3557 Assert(nmsgs > 0);
3558
3560 &txn->ninvalidations,
3561 msgs, nmsgs);
3562
3563 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3564
3565 MemoryContextSwitchTo(oldcontext);
3566}
3567
3568/*
3569 * Accumulate the invalidations distributed by other committed transactions
3570 * for executing them later.
3571 *
3572 * This function is similar to ReorderBufferAddInvalidations() but stores
3573 * the given inval messages to the txn->invalidations_distributed with the
3574 * overflow check.
3575 *
3576 * This needs to be called by committed transactions to distribute their
3577 * inval messages to in-progress transactions.
3578 */
3579void
3581 XLogRecPtr lsn, Size nmsgs,
3583{
3584 ReorderBufferTXN *txn;
3585 MemoryContext oldcontext;
3586
3587 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3588
3589 oldcontext = MemoryContextSwitchTo(rb->context);
3590
3591 /*
3592 * Collect all the invalidations under the top transaction, if available,
3593 * so that we can execute them all together. See comments
3594 * ReorderBufferAddInvalidations.
3595 */
3596 txn = rbtxn_get_toptxn(txn);
3597
3598 Assert(nmsgs > 0);
3599
3601 {
3602 /*
3603 * Check the transaction has enough space for storing distributed
3604 * invalidation messages.
3605 */
3607 {
3608 /*
3609 * Mark the invalidation message as overflowed and free up the
3610 * messages accumulated so far.
3611 */
3613
3615 {
3619 }
3620 }
3621 else
3624 msgs, nmsgs);
3625 }
3626
3627 /* Queue the invalidation messages into the transaction */
3628 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3629
3630 MemoryContextSwitchTo(oldcontext);
3631}
3632
3633/*
3634 * Apply all invalidations we know. Possibly we only need parts at this point
3635 * in the changestream but we don't know which those are.
3636 */
3637static void
3639{
3640 int i;
3641
3642 for (i = 0; i < nmsgs; i++)
3644}
3645
3646/*
3647 * Mark a transaction as containing catalog changes
3648 */
3649void
3651 XLogRecPtr lsn)
3652{
3653 ReorderBufferTXN *txn;
3654
3655 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3656
3657 if (!rbtxn_has_catalog_changes(txn))
3658 {
3660 dclist_push_tail(&rb->catchange_txns, &txn->catchange_node);
3661 }
3662
3663 /*
3664 * Mark top-level transaction as having catalog changes too if one of its
3665 * children has so that the ReorderBufferBuildTupleCidHash can
3666 * conveniently check just top-level transaction and decide whether to
3667 * build the hash table or not.
3668 */
3669 if (rbtxn_is_subtxn(txn))
3670 {
3671 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
3672
3673 if (!rbtxn_has_catalog_changes(toptxn))
3674 {
3676 dclist_push_tail(&rb->catchange_txns, &toptxn->catchange_node);
3677 }
3678 }
3679}
3680
3681/*
3682 * Return palloc'ed array of the transactions that have changed catalogs.
3683 * The returned array is sorted in xidComparator order.
3684 *
3685 * The caller must free the returned array when done with it.
3686 */
3689{
3690 dlist_iter iter;
3691 TransactionId *xids = NULL;
3692 size_t xcnt = 0;
3693
3694 /* Quick return if the list is empty */
3695 if (dclist_count(&rb->catchange_txns) == 0)
3696 return NULL;
3697
3698 /* Initialize XID array */
3699 xids = palloc_array(TransactionId, dclist_count(&rb->catchange_txns));
3700 dclist_foreach(iter, &rb->catchange_txns)
3701 {
3703 catchange_node,
3704 iter.cur);
3705
3707
3708 xids[xcnt++] = txn->xid;
3709 }
3710
3711 qsort(xids, xcnt, sizeof(TransactionId), xidComparator);
3712
3713 Assert(xcnt == dclist_count(&rb->catchange_txns));
3714 return xids;
3715}
3716
3717/*
3718 * Query whether a transaction is already *known* to contain catalog
3719 * changes. This can be wrong until directly before the commit!
3720 */
3721bool
3723{
3724 ReorderBufferTXN *txn;
3725
3726 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3727 false);
3728 if (txn == NULL)
3729 return false;
3730
3731 return rbtxn_has_catalog_changes(txn);
3732}
3733
3734/*
3735 * ReorderBufferXidHasBaseSnapshot
3736 * Have we already set the base snapshot for the given txn/subtxn?
3737 */
3738bool
3740{
3741 ReorderBufferTXN *txn;
3742
3743 txn = ReorderBufferTXNByXid(rb, xid, false,
3744 NULL, InvalidXLogRecPtr, false);
3745
3746 /* transaction isn't known yet, ergo no snapshot */
3747 if (txn == NULL)
3748 return false;
3749
3750 /* a known subtxn? operate on top-level txn instead */
3751 if (rbtxn_is_known_subxact(txn))
3752 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3753 NULL, InvalidXLogRecPtr, false);
3754
3755 return txn->base_snapshot != NULL;
3756}
3757
3758
3759/*
3760 * ---------------------------------------
3761 * Disk serialization support
3762 * ---------------------------------------
3763 */
3764
3765/*
3766 * Ensure the IO buffer is >= sz.
3767 */
3768static void
3770{
3771 if (!rb->outbufsize)
3772 {
3773 rb->outbuf = MemoryContextAlloc(rb->context, sz);
3774 rb->outbufsize = sz;
3775 }
3776 else if (rb->outbufsize < sz)
3777 {
3778 rb->outbuf = repalloc(rb->outbuf, sz);
3779 rb->outbufsize = sz;
3780 }
3781}
3782
3783
3784/* Compare two transactions by size */
3785static int
3787{
3790
3791 if (ta->size < tb->size)
3792 return -1;
3793 if (ta->size > tb->size)
3794 return 1;
3795 return 0;
3796}
3797
3798/*
3799 * Find the largest transaction (toplevel or subxact) to evict (spill to disk).
3800 */
3801static ReorderBufferTXN *
3803{
3805
3806 /* Get the largest transaction from the max-heap */
3808 pairingheap_first(rb->txn_heap));
3809
3810 Assert(largest);
3811 Assert(largest->size > 0);
3812 Assert(largest->size <= rb->size);
3813
3814 return largest;
3815}
3816
3817/*
3818 * Find the largest streamable (and non-aborted) toplevel transaction to evict
3819 * (by streaming).
3820 *
3821 * This can be seen as an optimized version of ReorderBufferLargestTXN, which
3822 * should give us the same transaction (because we don't update memory account
3823 * for subtransaction with streaming, so it's always 0). But we can simply
3824 * iterate over the limited number of toplevel transactions that have a base
3825 * snapshot. There is no use of selecting a transaction that doesn't have base
3826 * snapshot because we don't decode such transactions. Also, we do not select
3827 * the transaction which doesn't have any streamable change.
3828 *
3829 * Note that, we skip transactions that contain incomplete changes. There
3830 * is a scope of optimization here such that we can select the largest
3831 * transaction which has incomplete changes. But that will make the code and
3832 * design quite complex and that might not be worth the benefit. If we plan to
3833 * stream the transactions that contain incomplete changes then we need to
3834 * find a way to partially stream/truncate the transaction changes in-memory
3835 * and build a mechanism to partially truncate the spilled files.
3836 * Additionally, whenever we partially stream the transaction we need to
3837 * maintain the last streamed lsn and next time we need to restore from that
3838 * segment and the offset in WAL. As we stream the changes from the top
3839 * transaction and restore them subtransaction wise, we need to even remember
3840 * the subxact from where we streamed the last change.
3841 */
3842static ReorderBufferTXN *
3844{
3845 dlist_iter iter;
3846 Size largest_size = 0;
3848
3849 /* Find the largest top-level transaction having a base snapshot. */
3850 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
3851 {
3852 ReorderBufferTXN *txn;
3853
3854 txn = dlist_container(ReorderBufferTXN, base_snapshot_node, iter.cur);
3855
3856 /* must not be a subtxn */
3858 /* base_snapshot must be set */
3859 Assert(txn->base_snapshot != NULL);
3860
3861 /* Don't consider these kinds of transactions for eviction. */
3862 if (rbtxn_has_partial_change(txn) ||
3864 rbtxn_is_aborted(txn))
3865 continue;
3866
3867 /* Find the largest of the eviction candidates. */
3868 if ((largest == NULL || txn->total_size > largest_size) &&
3869 (txn->total_size > 0))
3870 {
3871 largest = txn;
3872 largest_size = txn->total_size;
3873 }
3874 }
3875
3876 return largest;
3877}
3878
3879/*
3880 * Check whether the logical_decoding_work_mem limit was reached, and if yes
3881 * pick the largest (sub)transaction at-a-time to evict and spill its changes to
3882 * disk or send to the output plugin until we reach under the memory limit.
3883 *
3884 * If debug_logical_replication_streaming is set to "immediate", stream or
3885 * serialize the changes immediately.
3886 *
3887 * XXX At this point we select the transactions until we reach under the memory
3888 * limit, but we might also adapt a more elaborate eviction strategy - for example
3889 * evicting enough transactions to free certain fraction (e.g. 50%) of the memory
3890 * limit.
3891 */
3892static void
3894{
3895 ReorderBufferTXN *txn;
3896 bool update_stats = true;
3897
3898 if (rb->size >= logical_decoding_work_mem * (Size) 1024)
3899 {
3900 /*
3901 * Update the statistics as the memory usage has reached the limit. We
3902 * report the statistics update later in this function since we can
3903 * update the slot statistics altogether while streaming or
3904 * serializing transactions in most cases.
3905 */
3906 rb->memExceededCount += 1;
3907 }
3909 {
3910 /*
3911 * Bail out if debug_logical_replication_streaming is buffered and we
3912 * haven't exceeded the memory limit.
3913 */
3914 return;
3915 }
3916
3917 /*
3918 * If debug_logical_replication_streaming is immediate, loop until there's
3919 * no change. Otherwise, loop until we reach under the memory limit. One
3920 * might think that just by evicting the largest (sub)transaction we will
3921 * come under the memory limit based on assumption that the selected
3922 * transaction is at least as large as the most recent change (which
3923 * caused us to go over the memory limit). However, that is not true
3924 * because a user can reduce the logical_decoding_work_mem to a smaller
3925 * value before the most recent change.
3926 */
3927 while (rb->size >= logical_decoding_work_mem * (Size) 1024 ||
3929 rb->size > 0))
3930 {
3931 /*
3932 * Pick the largest non-aborted transaction and evict it from memory
3933 * by streaming, if possible. Otherwise, spill to disk.
3934 */
3937 {
3938 /* we know there has to be one, because the size is not zero */
3939 Assert(txn && rbtxn_is_toptxn(txn));
3940 Assert(txn->total_size > 0);
3941 Assert(rb->size >= txn->total_size);
3942
3943 /* skip the transaction if aborted */
3945 continue;
3946
3948 }
3949 else
3950 {
3951 /*
3952 * Pick the largest transaction (or subtransaction) and evict it
3953 * from memory by serializing it to disk.
3954 */
3956
3957 /* we know there has to be one, because the size is not zero */
3958 Assert(txn);
3959 Assert(txn->size > 0);
3960 Assert(rb->size >= txn->size);
3961
3962 /* skip the transaction if aborted */
3964 continue;
3965
3967 }
3968
3969 /*
3970 * After eviction, the transaction should have no entries in memory,
3971 * and should use 0 bytes for changes.
3972 */
3973 Assert(txn->size == 0);
3974 Assert(txn->nentries_mem == 0);
3975
3976 /*
3977 * We've reported the memExceededCount update while streaming or
3978 * serializing the transaction.
3979 */
3980 update_stats = false;
3981 }
3982
3983 if (update_stats)
3985
3986 /* We must be under the memory limit now. */
3987 Assert(rb->size < logical_decoding_work_mem * (Size) 1024);
3988}
3989
3990/*
3991 * Spill data of a large transaction (and its subtransactions) to disk.
3992 */
3993static void
3995{
3998 int fd = -1;
4000 Size spilled = 0;
4001 Size size = txn->size;
4002
4003 elog(DEBUG2, "spill %u changes in XID %u to disk",
4004 (uint32) txn->nentries_mem, txn->xid);
4005
4006 /* do the same to all child TXs */
4008 {
4010
4013 }
4014
4015 /* serialize changestream */
4017 {
4018 ReorderBufferChange *change;
4019
4020 change = dlist_container(ReorderBufferChange, node, change_i.cur);
4021
4022 /*
4023 * store in segment in which it belongs by start lsn, don't split over
4024 * multiple segments tho
4025 */
4026 if (fd == -1 ||
4028 {
4029 char path[MAXPGPATH];
4030
4031 if (fd != -1)
4033
4035
4036 /*
4037 * No need to care about TLIs here, only used during a single run,
4038 * so each LSN only maps to a specific WAL record.
4039 */
4041 curOpenSegNo);
4042
4043 /* open segment, create it if necessary */
4044 fd = OpenTransientFile(path,
4046
4047 if (fd < 0)
4048 ereport(ERROR,
4050 errmsg("could not open file \"%s\": %m", path)));
4051 }
4052
4053 ReorderBufferSerializeChange(rb, txn, fd, change);
4054 dlist_delete(&change->node);
4055 ReorderBufferFreeChange(rb, change, false);
4056
4057 spilled++;
4058 }
4059
4060 /* Update the memory counter */
4061 ReorderBufferChangeMemoryUpdate(rb, NULL, txn, false, size);
4062
4063 /* update the statistics iff we have spilled anything */
4064 if (spilled)
4065 {
4066 rb->spillCount += 1;
4067 rb->spillBytes += size;
4068
4069 /* don't consider already serialized transactions */
4070 rb->spillTxns += (rbtxn_is_serialized(txn) || rbtxn_is_serialized_clear(txn)) ? 0 : 1;
4071
4072 /* update the decoding stats */
4074 }
4075
4076 Assert(spilled == txn->nentries_mem);
4078 txn->nentries_mem = 0;
4080
4081 if (fd != -1)
4083}
4084
4085/*
4086 * Serialize individual change to disk.
4087 */
4088static void
4090 int fd, ReorderBufferChange *change)
4091{
4094
4096
4097 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4098 memcpy(&ondisk->change, change, sizeof(ReorderBufferChange));
4099
4100 switch (change->action)
4101 {
4102 /* fall through these, they're all similar enough */
4107 {
4108 char *data;
4110 newtup;
4111 Size oldlen = 0;
4112 Size newlen = 0;
4113
4114 oldtup = change->data.tp.oldtuple;
4115 newtup = change->data.tp.newtuple;
4116
4117 if (oldtup)
4118 {
4119 sz += sizeof(HeapTupleData);
4120 oldlen = oldtup->t_len;
4121 sz += oldlen;
4122 }
4123
4124 if (newtup)
4125 {
4126 sz += sizeof(HeapTupleData);
4127 newlen = newtup->t_len;
4128 sz += newlen;
4129 }
4130
4131 /* make sure we have enough space */
4133
4134 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4135 /* might have been reallocated above */
4136 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4137
4138 if (oldlen)
4139 {
4140 memcpy(data, oldtup, sizeof(HeapTupleData));
4141 data += sizeof(HeapTupleData);
4142
4143 memcpy(data, oldtup->t_data, oldlen);
4144 data += oldlen;
4145 }
4146
4147 if (newlen)
4148 {
4149 memcpy(data, newtup, sizeof(HeapTupleData));
4150 data += sizeof(HeapTupleData);
4151
4152 memcpy(data, newtup->t_data, newlen);
4153 data += newlen;
4154 }
4155 break;
4156 }
4158 {
4159 char *data;
4160 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4161
4162 sz += prefix_size + change->data.msg.message_size +
4163 sizeof(Size) + sizeof(Size);
4165
4166 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4167
4168 /* might have been reallocated above */
4169 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4170
4171 /* write the prefix including the size */
4172 memcpy(data, &prefix_size, sizeof(Size));
4173 data += sizeof(Size);
4174 memcpy(data, change->data.msg.prefix,
4175 prefix_size);
4176 data += prefix_size;
4177
4178 /* write the message including the size */
4179 memcpy(data, &change->data.msg.message_size, sizeof(Size));
4180 data += sizeof(Size);
4181 memcpy(data, change->data.msg.message,
4182 change->data.msg.message_size);
4183 data += change->data.msg.message_size;
4184
4185 break;
4186 }
4188 {
4189 char *data;
4191 change->data.inval.ninvalidations;
4192
4193 sz += inval_size;
4194
4196 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4197
4198 /* might have been reallocated above */
4199 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4201 data += inval_size;
4202
4203 break;
4204 }
4206 {
4207 Snapshot snap;
4208 char *data;
4209
4210 snap = change->data.snapshot;
4211
4212 sz += sizeof(SnapshotData) +
4213 sizeof(TransactionId) * snap->xcnt +
4214 sizeof(TransactionId) * snap->subxcnt;
4215
4216 /* make sure we have enough space */
4218 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4219 /* might have been reallocated above */
4220 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4221
4222 memcpy(data, snap, sizeof(SnapshotData));
4223 data += sizeof(SnapshotData);
4224
4225 if (snap->xcnt)
4226 {
4227 memcpy(data, snap->xip,
4228 sizeof(TransactionId) * snap->xcnt);
4229 data += sizeof(TransactionId) * snap->xcnt;
4230 }
4231
4232 if (snap->subxcnt)
4233 {
4234 memcpy(data, snap->subxip,
4235 sizeof(TransactionId) * snap->subxcnt);
4236 data += sizeof(TransactionId) * snap->subxcnt;
4237 }
4238 break;
4239 }
4241 {
4242 Size size;
4243 char *data;
4244
4245 /* account for the OIDs of truncated relations */
4246 size = sizeof(Oid) * change->data.truncate.nrelids;
4247 sz += size;
4248
4249 /* make sure we have enough space */
4251
4252 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4253 /* might have been reallocated above */
4254 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4255
4256 memcpy(data, change->data.truncate.relids, size);
4257 data += size;
4258
4259 break;
4260 }
4265 /* ReorderBufferChange contains everything important */
4266 break;
4267 }
4268
4269 ondisk->size = sz;
4270
4271 errno = 0;
4273 if (write(fd, rb->outbuf, ondisk->size) != ondisk->size)
4274 {
4275 int save_errno = errno;
4276
4278
4279 /* if write didn't set errno, assume problem is no disk space */
4281 ereport(ERROR,
4283 errmsg("could not write to data file for XID %u: %m",
4284 txn->xid)));
4285 }
4287
4288 /*
4289 * Keep the transaction's final_lsn up to date with each change we send to
4290 * disk, so that ReorderBufferRestoreCleanup works correctly. (We used to
4291 * only do this on commit and abort records, but that doesn't work if a
4292 * system crash leaves a transaction without its abort record).
4293 *
4294 * Make sure not to move it backwards.
4295 */
4296 if (txn->final_lsn < change->lsn)
4297 txn->final_lsn = change->lsn;
4298
4299 Assert(ondisk->change.action == change->action);
4300}
4301
4302/* Returns true, if the output plugin supports streaming, false, otherwise. */
4303static inline bool
4305{
4306 LogicalDecodingContext *ctx = rb->private_data;
4307
4308 return ctx->streaming;
4309}
4310
4311/* Returns true, if the streaming can be started now, false, otherwise. */
4312static inline bool
4314{
4315 LogicalDecodingContext *ctx = rb->private_data;
4316 SnapBuild *builder = ctx->snapshot_builder;
4317
4318 /* We can't start streaming unless a consistent state is reached. */
4320 return false;
4321
4322 /*
4323 * We can't start streaming immediately even if the streaming is enabled
4324 * because we previously decoded this transaction and now just are
4325 * restarting.
4326 */
4328 !SnapBuildXactNeedsSkip(builder, ctx->reader->ReadRecPtr))
4329 return true;
4330
4331 return false;
4332}
4333
4334/*
4335 * Send data of a large transaction (and its subtransactions) to the
4336 * output plugin, but using the stream API.
4337 */
4338static void
4340{
4341 Snapshot snapshot_now;
4342 CommandId command_id;
4343 Size stream_bytes;
4344 bool txn_is_streamed;
4345
4346 /* We can never reach here for a subtransaction. */
4347 Assert(rbtxn_is_toptxn(txn));
4348
4349 /*
4350 * We can't make any assumptions about base snapshot here, similar to what
4351 * ReorderBufferCommit() does. That relies on base_snapshot getting
4352 * transferred from subxact in ReorderBufferCommitChild(), but that was
4353 * not yet called as the transaction is in-progress.
4354 *
4355 * So just walk the subxacts and use the same logic here. But we only need
4356 * to do that once, when the transaction is streamed for the first time.
4357 * After that we need to reuse the snapshot from the previous run.
4358 *
4359 * Unlike DecodeCommit which adds xids of all the subtransactions in
4360 * snapshot's xip array via SnapBuildCommitTxn, we can't do that here but
4361 * we do add them to subxip array instead via ReorderBufferCopySnap. This
4362 * allows the catalog changes made in subtransactions decoded till now to
4363 * be visible.
4364 */
4365 if (txn->snapshot_now == NULL)
4366 {
4368
4369 /* make sure this transaction is streamed for the first time */
4371
4372 /* at the beginning we should have invalid command ID */
4374
4376 {
4378
4381 }
4382
4383 /*
4384 * If this transaction has no snapshot, it didn't make any changes to
4385 * the database till now, so there's nothing to decode.
4386 */
4387 if (txn->base_snapshot == NULL)
4388 {
4389 Assert(txn->ninvalidations == 0);
4390 return;
4391 }
4392
4393 command_id = FirstCommandId;
4394 snapshot_now = ReorderBufferCopySnap(rb, txn->base_snapshot,
4395 txn, command_id);
4396 }
4397 else
4398 {
4399 /* the transaction must have been already streamed */
4401
4402 /*
4403 * Nah, we already have snapshot from the previous streaming run. We
4404 * assume new subxacts can't move the LSN backwards, and so can't beat
4405 * the LSN condition in the previous branch (so no need to walk
4406 * through subxacts again). In fact, we must not do that as we may be
4407 * using snapshot half-way through the subxact.
4408 */
4409 command_id = txn->command_id;
4410
4411 /*
4412 * We can't use txn->snapshot_now directly because after the last
4413 * streaming run, we might have got some new sub-transactions. So we
4414 * need to add them to the snapshot.
4415 */
4416 snapshot_now = ReorderBufferCopySnap(rb, txn->snapshot_now,
4417 txn, command_id);
4418
4419 /* Free the previously copied snapshot. */
4420 Assert(txn->snapshot_now->copied);
4422 txn->snapshot_now = NULL;
4423 }
4424
4425 /*
4426 * Remember this information to be used later to update stats. We can't
4427 * update the stats here as an error while processing the changes would
4428 * lead to the accumulation of stats even though we haven't streamed all
4429 * the changes.
4430 */
4432 stream_bytes = txn->total_size;
4433
4434 /* Process and send the changes to output plugin. */
4435 ReorderBufferProcessTXN(rb, txn, InvalidXLogRecPtr, snapshot_now,
4436 command_id, true);
4437
4438 rb->streamCount += 1;
4439 rb->streamBytes += stream_bytes;
4440
4441 /* Don't consider already streamed transaction. */
4442 rb->streamTxns += (txn_is_streamed) ? 0 : 1;
4443
4444 /* update the decoding stats */
4446
4448 Assert(txn->nentries == 0);
4449 Assert(txn->nentries_mem == 0);
4450}
4451
4452/*
4453 * Size of a change in memory.
4454 */
4455static Size
4457{
4458 Size sz = sizeof(ReorderBufferChange);
4459
4460 switch (change->action)
4461 {
4462 /* fall through these, they're all similar enough */
4467 {
4469 newtup;
4470 Size oldlen = 0;
4471 Size newlen = 0;
4472
4473 oldtup = change->data.tp.oldtuple;
4474 newtup = change->data.tp.newtuple;
4475
4476 if (oldtup)
4477 {
4478 sz += sizeof(HeapTupleData);
4479 oldlen = oldtup->t_len;
4480 sz += oldlen;
4481 }
4482
4483 if (newtup)
4484 {
4485 sz += sizeof(HeapTupleData);
4486 newlen = newtup->t_len;
4487 sz += newlen;
4488 }
4489
4490 break;
4491 }
4493 {
4494 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4495
4496 sz += prefix_size + change->data.msg.message_size +
4497 sizeof(Size) + sizeof(Size);
4498
4499 break;
4500 }
4502 {
4503 sz += sizeof(SharedInvalidationMessage) *
4504 change->data.inval.ninvalidations;
4505 break;
4506 }
4508 {
4509 Snapshot snap;
4510
4511 snap = change->data.snapshot;
4512
4513 sz += sizeof(SnapshotData) +
4514 sizeof(TransactionId) * snap->xcnt +
4515 sizeof(TransactionId) * snap->subxcnt;
4516
4517 break;
4518 }
4520 {
4521 sz += sizeof(Oid) * change->data.truncate.nrelids;
4522
4523 break;
4524 }
4529 /* ReorderBufferChange contains everything important */
4530 break;
4531 }
4532
4533 return sz;
4534}
4535
4536
4537/*
4538 * Restore a number of changes spilled to disk back into memory.
4539 */
4540static Size
4542 TXNEntryFile *file, XLogSegNo *segno)
4543{
4544 Size restored = 0;
4547 File *fd = &file->vfd;
4548
4551
4552 /* free current entries, so we have memory for more */
4554 {
4557
4558 dlist_delete(&cleanup->node);
4560 }
4561 txn->nentries_mem = 0;
4563
4565
4566 while (restored < max_changes_in_memory && *segno <= last_segno)
4567 {
4568 int readBytes;
4570
4572
4573 if (*fd == -1)
4574 {
4575 char path[MAXPGPATH];
4576
4577 /* first time in */
4578 if (*segno == 0)
4579 XLByteToSeg(txn->first_lsn, *segno, wal_segment_size);
4580
4581 Assert(*segno != 0 || dlist_is_empty(&txn->changes));
4582
4583 /*
4584 * No need to care about TLIs here, only used during a single run,
4585 * so each LSN only maps to a specific WAL record.
4586 */
4588 *segno);
4589
4591
4592 /* No harm in resetting the offset even in case of failure */
4593 file->curOffset = 0;
4594
4595 if (*fd < 0 && errno == ENOENT)
4596 {
4597 *fd = -1;
4598 (*segno)++;
4599 continue;
4600 }
4601 else if (*fd < 0)
4602 ereport(ERROR,
4604 errmsg("could not open file \"%s\": %m",
4605 path)));
4606 }
4607
4608 /*
4609 * Read the statically sized part of a change which has information
4610 * about the total size. If we couldn't read a record, we're at the
4611 * end of this file.
4612 */
4614 readBytes = FileRead(file->vfd, rb->outbuf,
4617
4618 /* eof */
4619 if (readBytes == 0)
4620 {
4621 FileClose(*fd);
4622 *fd = -1;
4623 (*segno)++;
4624 continue;
4625 }
4626 else if (readBytes < 0)
4627 ereport(ERROR,
4629 errmsg("could not read from reorderbuffer spill file: %m")));
4630 else if (readBytes != sizeof(ReorderBufferDiskChange))
4631 ereport(ERROR,
4633 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4634 readBytes,
4635 (uint32) sizeof(ReorderBufferDiskChange))));
4636
4637 file->curOffset += readBytes;
4638
4639 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4640
4642 sizeof(ReorderBufferDiskChange) + ondisk->size);
4643 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4644
4645 readBytes = FileRead(file->vfd,
4646 rb->outbuf + sizeof(ReorderBufferDiskChange),
4647 ondisk->size - sizeof(ReorderBufferDiskChange),
4648 file->curOffset,
4650
4651 if (readBytes < 0)
4652 ereport(ERROR,
4654 errmsg("could not read from reorderbuffer spill file: %m")));
4655 else if (readBytes != ondisk->size - sizeof(ReorderBufferDiskChange))
4656 ereport(ERROR,
4658 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4659 readBytes,
4660 (uint32) (ondisk->size - sizeof(ReorderBufferDiskChange)))));
4661
4662 file->curOffset += readBytes;
4663
4664 /*
4665 * ok, read a full change from disk, now restore it into proper
4666 * in-memory format
4667 */
4668 ReorderBufferRestoreChange(rb, txn, rb->outbuf);
4669 restored++;
4670 }
4671
4672 return restored;
4673}
4674
4675/*
4676 * Convert change from its on-disk format to in-memory format and queue it onto
4677 * the TXN's ->changes list.
4678 *
4679 * Note: although "data" is declared char*, at entry it points to a
4680 * maxalign'd buffer, making it safe in most of this function to assume
4681 * that the pointed-to data is suitably aligned for direct access.
4682 */
4683static void
4685 char *data)
4686{
4688 ReorderBufferChange *change;
4689
4690 ondisk = (ReorderBufferDiskChange *) data;
4691
4692 change = ReorderBufferAllocChange(rb);
4693
4694 /* copy static part */
4695 memcpy(change, &ondisk->change, sizeof(ReorderBufferChange));
4696
4697 data += sizeof(ReorderBufferDiskChange);
4698
4699 /* restore individual stuff */
4700 switch (change->action)
4701 {
4702 /* fall through these, they're all similar enough */
4707 if (change->data.tp.oldtuple)
4708 {
4709 uint32 tuplelen = ((HeapTuple) data)->t_len;
4710
4711 change->data.tp.oldtuple =
4713
4714 /* restore ->tuple */
4715 memcpy(change->data.tp.oldtuple, data,
4716 sizeof(HeapTupleData));
4717 data += sizeof(HeapTupleData);
4718
4719 /* reset t_data pointer into the new tuplebuf */
4720 change->data.tp.oldtuple->t_data =
4721 (HeapTupleHeader) ((char *) change->data.tp.oldtuple + HEAPTUPLESIZE);
4722
4723 /* restore tuple data itself */
4725 data += tuplelen;
4726 }
4727
4728 if (change->data.tp.newtuple)
4729 {
4730 /* here, data might not be suitably aligned! */
4732
4734 sizeof(uint32));
4735
4736 change->data.tp.newtuple =
4738
4739 /* restore ->tuple */
4740 memcpy(change->data.tp.newtuple, data,
4741 sizeof(HeapTupleData));
4742 data += sizeof(HeapTupleData);
4743
4744 /* reset t_data pointer into the new tuplebuf */
4745 change->data.tp.newtuple->t_data =
4746 (HeapTupleHeader) ((char *) change->data.tp.newtuple + HEAPTUPLESIZE);
4747
4748 /* restore tuple data itself */
4750 data += tuplelen;
4751 }
4752
4753 break;
4755 {
4756 Size prefix_size;
4757
4758 /* read prefix */
4759 memcpy(&prefix_size, data, sizeof(Size));
4760 data += sizeof(Size);
4761 change->data.msg.prefix = MemoryContextAlloc(rb->context,
4762 prefix_size);
4763 memcpy(change->data.msg.prefix, data, prefix_size);
4764 Assert(change->data.msg.prefix[prefix_size - 1] == '\0');
4765 data += prefix_size;
4766
4767 /* read the message */
4768 memcpy(&change->data.msg.message_size, data, sizeof(Size));
4769 data += sizeof(Size);
4770 change->data.msg.message = MemoryContextAlloc(rb->context,
4771 change->data.msg.message_size);
4772 memcpy(change->data.msg.message, data,
4773 change->data.msg.message_size);
4774 data += change->data.msg.message_size;
4775
4776 break;
4777 }
4779 {
4781 change->data.inval.ninvalidations;
4782
4783 change->data.inval.invalidations =
4784 MemoryContextAlloc(rb->context, inval_size);
4785
4786 /* read the message */
4788
4789 break;
4790 }
4792 {
4795 Size size;
4796
4797 oldsnap = (Snapshot) data;
4798
4799 size = sizeof(SnapshotData) +
4800 sizeof(TransactionId) * oldsnap->xcnt +
4801 sizeof(TransactionId) * (oldsnap->subxcnt + 0);
4802
4803 change->data.snapshot = MemoryContextAllocZero(rb->context, size);
4804
4805 newsnap = change->data.snapshot;
4806
4807 memcpy(newsnap, data, size);
4808 newsnap->xip = (TransactionId *)
4809 (((char *) newsnap) + sizeof(SnapshotData));
4810 newsnap->subxip = newsnap->xip + newsnap->xcnt;
4811 newsnap->copied = true;
4812 break;
4813 }
4814 /* the base struct contains all the data, easy peasy */
4816 {
4817 Oid *relids;
4818
4819 relids = ReorderBufferAllocRelids(rb, change->data.truncate.nrelids);
4820 memcpy(relids, data, change->data.truncate.nrelids * sizeof(Oid));
4821 change->data.truncate.relids = relids;
4822
4823 break;
4824 }
4829 break;
4830 }
4831
4832 dlist_push_tail(&txn->changes, &change->node);
4833 txn->nentries_mem++;
4834
4835 /*
4836 * Update memory accounting for the restored change. We need to do this
4837 * although we don't check the memory limit when restoring the changes in
4838 * this branch (we only do that when initially queueing the changes after
4839 * decoding), because we will release the changes later, and that will
4840 * update the accounting too (subtracting the size from the counters). And
4841 * we don't want to underflow there.
4842 */
4844 ReorderBufferChangeSize(change));
4845}
4846
4847/*
4848 * Remove all on-disk stored for the passed in transaction.
4849 */
4850static void
4852{
4853 XLogSegNo first;
4854 XLogSegNo cur;
4855 XLogSegNo last;
4856
4859
4862
4863 /* iterate over all possible filenames, and delete them */
4864 for (cur = first; cur <= last; cur++)
4865 {
4866 char path[MAXPGPATH];
4867
4869 if (unlink(path) != 0 && errno != ENOENT)
4870 ereport(ERROR,
4872 errmsg("could not remove file \"%s\": %m", path)));
4873 }
4874}
4875
4876/*
4877 * Remove any leftover serialized reorder buffers from a slot directory after a
4878 * prior crash or decoding session exit.
4879 */
4880static void
4881ReorderBufferCleanupSerializedTXNs(const char *slotname)
4882{
4883 DIR *spill_dir;
4884 struct dirent *spill_de;
4885 struct stat statbuf;
4886 char path[MAXPGPATH * 2 + sizeof(PG_REPLSLOT_DIR)];
4887
4888 sprintf(path, "%s/%s", PG_REPLSLOT_DIR, slotname);
4889
4890 /* we're only handling directories here, skip if it's not ours */
4891 if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
4892 return;
4893
4894 spill_dir = AllocateDir(path);
4895 while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL)
4896 {
4897 /* only look at names that can be ours */
4898 if (strncmp(spill_de->d_name, "xid", 3) == 0)
4899 {
4900 snprintf(path, sizeof(path),
4901 "%s/%s/%s", PG_REPLSLOT_DIR, slotname,
4902 spill_de->d_name);
4903
4904 if (unlink(path) != 0)
4905 ereport(ERROR,
4907 errmsg("could not remove file \"%s\" during removal of %s/%s/xid*: %m",
4908 path, PG_REPLSLOT_DIR, slotname)));
4909 }
4910 }
4912}
4913
4914/*
4915 * Given a replication slot, transaction ID and segment number, fill in the
4916 * corresponding spill file into 'path', which is a caller-owned buffer of size
4917 * at least MAXPGPATH.
4918 */
4919static void
4921 XLogSegNo segno)
4922{
4924
4926
4927 snprintf(path, MAXPGPATH, "%s/%s/xid-%u-lsn-%X-%X.spill",
4930 xid, LSN_FORMAT_ARGS(recptr));
4931}
4932
4933/*
4934 * Delete all data spilled to disk after we've restarted/crashed. It will be
4935 * recreated when the respective slots are reused.
4936 */
4937void
4939{
4941 struct dirent *logical_de;
4942
4945 {
4946 if (strcmp(logical_de->d_name, ".") == 0 ||
4947 strcmp(logical_de->d_name, "..") == 0)
4948 continue;
4949
4950 /* if it cannot be a slot, skip the directory */
4951 if (!ReplicationSlotValidateName(logical_de->d_name, true, DEBUG2))
4952 continue;
4953
4954 /*
4955 * ok, has to be a surviving logical slot, iterate and delete
4956 * everything starting with xid-*
4957 */
4959 }
4961}
4962
4963/* ---------------------------------------
4964 * toast reassembly support
4965 * ---------------------------------------
4966 */
4967
4968/*
4969 * Initialize per tuple toast reconstruction support.
4970 */
4971static void
4973{
4975
4976 Assert(txn->toast_hash == NULL);
4977
4978 hash_ctl.keysize = sizeof(Oid);
4979 hash_ctl.entrysize = sizeof(ReorderBufferToastEnt);
4980 hash_ctl.hcxt = rb->context;
4981 txn->toast_hash = hash_create("ReorderBufferToastHash", 5, &hash_ctl,
4983}
4984
4985/*
4986 * Per toast-chunk handling for toast reconstruction
4987 *
4988 * Appends a toast chunk so we can reconstruct it when the tuple "owning" the
4989 * toasted Datum comes along.
4990 */
4991static void
4993 Relation relation, ReorderBufferChange *change)
4994{
4997 bool found;
4999 bool isnull;
5000 Pointer chunk;
5001 TupleDesc desc = RelationGetDescr(relation);
5002 Oid chunk_id;
5004
5005 if (txn->toast_hash == NULL)
5007
5008 Assert(IsToastRelation(relation));
5009
5010 newtup = change->data.tp.newtuple;
5011 chunk_id = DatumGetObjectId(fastgetattr(newtup, 1, desc, &isnull));
5012 Assert(!isnull);
5013 chunk_seq = DatumGetInt32(fastgetattr(newtup, 2, desc, &isnull));
5014 Assert(!isnull);
5015
5017 hash_search(txn->toast_hash, &chunk_id, HASH_ENTER, &found);
5018
5019 if (!found)
5020 {
5021 Assert(ent->chunk_id == chunk_id);
5022 ent->num_chunks = 0;
5023 ent->last_chunk_seq = 0;
5024 ent->size = 0;
5025 ent->reconstructed = NULL;
5026 dlist_init(&ent->chunks);
5027
5028 if (chunk_seq != 0)
5029 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq 0",
5030 chunk_seq, chunk_id);
5031 }
5032 else if (found && chunk_seq != ent->last_chunk_seq + 1)
5033 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq %d",
5034 chunk_seq, chunk_id, ent->last_chunk_seq + 1);
5035
5036 chunk = DatumGetPointer(fastgetattr(newtup, 3, desc, &isnull));
5037 Assert(!isnull);
5038
5039 /* calculate size so we can allocate the right size at once later */
5040 if (!VARATT_IS_EXTENDED(chunk))
5041 chunksize = VARSIZE(chunk) - VARHDRSZ;
5042 else if (VARATT_IS_SHORT(chunk))
5043 /* could happen due to heap_form_tuple doing its thing */
5045 else
5046 elog(ERROR, "unexpected type of toast chunk");
5047
5048 ent->size += chunksize;
5049 ent->last_chunk_seq = chunk_seq;
5050 ent->num_chunks++;
5051 dlist_push_tail(&ent->chunks, &change->node);
5052}
5053
5054/*
5055 * Rejigger change->newtuple to point to in-memory toast tuples instead of
5056 * on-disk toast tuples that may no longer exist (think DROP TABLE or VACUUM).
5057 *
5058 * We cannot replace unchanged toast tuples though, so those will still point
5059 * to on-disk toast data.
5060 *
5061 * While updating the existing change with detoasted tuple data, we need to
5062 * update the memory accounting info, because the change size will differ.
5063 * Otherwise the accounting may get out of sync, triggering serialization
5064 * at unexpected times.
5065 *
5066 * We simply subtract size of the change before rejiggering the tuple, and
5067 * then add the new size. This makes it look like the change was removed
5068 * and then added back, except it only tweaks the accounting info.
5069 *
5070 * In particular it can't trigger serialization, which would be pointless
5071 * anyway as it happens during commit processing right before handing
5072 * the change to the output plugin.
5073 */
5074static void
5076 Relation relation, ReorderBufferChange *change)
5077{
5078 TupleDesc desc;
5079 int natt;
5080 Datum *attrs;
5081 bool *isnull;
5082 bool *free;
5084 Relation toast_rel;
5086 MemoryContext oldcontext;
5088 Size old_size;
5089
5090 /* no toast tuples changed */
5091 if (txn->toast_hash == NULL)
5092 return;
5093
5094 /*
5095 * We're going to modify the size of the change. So, to make sure the
5096 * accounting is correct we record the current change size and then after
5097 * re-computing the change we'll subtract the recorded size and then
5098 * re-add the new change size at the end. We don't immediately subtract
5099 * the old size because if there is any error before we add the new size,
5100 * we will release the changes and that will update the accounting info
5101 * (subtracting the size from the counters). And we don't want to
5102 * underflow there.
5103 */
5105
5106 oldcontext = MemoryContextSwitchTo(rb->context);
5107
5108 /* we should only have toast tuples in an INSERT or UPDATE */
5109 Assert(change->data.tp.newtuple);
5110
5111 desc = RelationGetDescr(relation);
5112
5113 toast_rel = RelationIdGetRelation(relation->rd_rel->reltoastrelid);
5114 if (!RelationIsValid(toast_rel))
5115 elog(ERROR, "could not open toast relation with OID %u (base relation \"%s\")",
5116 relation->rd_rel->reltoastrelid, RelationGetRelationName(relation));
5117
5118 toast_desc = RelationGetDescr(toast_rel);
5119
5120 /* should we allocate from stack instead? */
5121 attrs = palloc0_array(Datum, desc->natts);
5122 isnull = palloc0_array(bool, desc->natts);
5123 free = palloc0_array(bool, desc->natts);
5124
5125 newtup = change->data.tp.newtuple;
5126
5127 heap_deform_tuple(newtup, desc, attrs, isnull);
5128
5129 for (natt = 0; natt < desc->natts; natt++)
5130 {
5134
5135 /* va_rawsize is the size of the original datum -- including header */
5136 varatt_external toast_pointer;
5139 varlena *reconstructed;
5140 dlist_iter it;
5141 Size data_done = 0;
5142
5143 if (attr->attisdropped)
5144 continue;
5145
5146 /* not a varlena datatype */
5147 if (attr->attlen != -1)
5148 continue;
5149
5150 /* no data */
5151 if (isnull[natt])
5152 continue;
5153
5154 /* ok, we know we have a toast datum */
5156
5157 /* no need to do anything if the tuple isn't external */
5159 continue;
5160
5162
5163 /*
5164 * Check whether the toast tuple changed, replace if so.
5165 */
5168 &toast_pointer.va_valueid,
5169 HASH_FIND,
5170 NULL);
5171 if (ent == NULL)
5172 continue;
5173
5174 new_datum =
5176
5177 free[natt] = true;
5178
5179 reconstructed = palloc0(toast_pointer.va_rawsize);
5180
5181 ent->reconstructed = reconstructed;
5182
5183 /* stitch toast tuple back together from its parts */
5184 dlist_foreach(it, &ent->chunks)
5185 {
5186 bool cisnull;
5189 Pointer chunk;
5190
5192 ctup = cchange->data.tp.newtuple;
5194
5195 Assert(!cisnull);
5196 Assert(!VARATT_IS_EXTERNAL(chunk));
5197 Assert(!VARATT_IS_SHORT(chunk));
5198
5199 memcpy(VARDATA(reconstructed) + data_done,
5200 VARDATA(chunk),
5201 VARSIZE(chunk) - VARHDRSZ);
5202 data_done += VARSIZE(chunk) - VARHDRSZ;
5203 }
5204 Assert(data_done == VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer));
5205
5206 /* make sure its marked as compressed or not */
5207 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
5208 SET_VARSIZE_COMPRESSED(reconstructed, data_done + VARHDRSZ);
5209 else
5210 SET_VARSIZE(reconstructed, data_done + VARHDRSZ);
5211
5213 redirect_pointer.pointer = reconstructed;
5214
5217 sizeof(redirect_pointer));
5218
5220 }
5221
5222 /*
5223 * Build tuple in separate memory & copy tuple back into the tuplebuf
5224 * passed to the output plugin. We can't directly heap_fill_tuple() into
5225 * the tuplebuf because attrs[] will point back into the current content.
5226 */
5227 tmphtup = heap_form_tuple(desc, attrs, isnull);
5228 Assert(newtup->t_len <= MaxHeapTupleSize);
5229 Assert(newtup->t_data == (HeapTupleHeader) ((char *) newtup + HEAPTUPLESIZE));
5230
5231 memcpy(newtup->t_data, tmphtup->t_data, tmphtup->t_len);
5232 newtup->t_len = tmphtup->t_len;
5233
5234 /*
5235 * free resources we won't further need, more persistent stuff will be
5236 * free'd in ReorderBufferToastReset().
5237 */
5238 RelationClose(toast_rel);
5239 pfree(tmphtup);
5240 for (natt = 0; natt < desc->natts; natt++)
5241 {
5242 if (free[natt])
5244 }
5245 pfree(attrs);
5246 pfree(free);
5247 pfree(isnull);
5248
5249 MemoryContextSwitchTo(oldcontext);
5250
5251 /* subtract the old change size */
5253 /* now add the change back, with the correct size */
5255 ReorderBufferChangeSize(change));
5256}
5257
5258/*
5259 * Free all resources allocated for toast reconstruction.
5260 */
5261static void
5263{
5266
5267 if (txn->toast_hash == NULL)
5268 return;
5269
5270 /* sequentially walk over the hash and free everything */
5273 {
5275
5276 if (ent->reconstructed != NULL)
5277 pfree(ent->reconstructed);
5278
5279 dlist_foreach_modify(it, &ent->chunks)
5280 {
5281 ReorderBufferChange *change =
5283
5284 dlist_delete(&change->node);
5285 ReorderBufferFreeChange(rb, change, true);
5286 }
5287 }
5288
5290 txn->toast_hash = NULL;
5291}
5292
5293
5294/* ---------------------------------------
5295 * Visibility support for logical decoding
5296 *
5297 *
5298 * Lookup actual cmin/cmax values when using decoding snapshot. We can't
5299 * always rely on stored cmin/cmax values because of two scenarios:
5300 *
5301 * * A tuple got changed multiple times during a single transaction and thus
5302 * has got a combo CID. Combo CIDs are only valid for the duration of a
5303 * single transaction.
5304 * * A tuple with a cmin but no cmax (and thus no combo CID) got
5305 * deleted/updated in another transaction than the one which created it
5306 * which we are looking at right now. As only one of cmin, cmax or combo CID
5307 * is actually stored in the heap we don't have access to the value we
5308 * need anymore.
5309 *
5310 * To resolve those problems we have a per-transaction hash of (cmin,
5311 * cmax) tuples keyed by (relfilelocator, ctid) which contains the actual
5312 * (cmin, cmax) values. That also takes care of combo CIDs by simply
5313 * not caring about them at all. As we have the real cmin/cmax values
5314 * combo CIDs aren't interesting.
5315 *
5316 * As we only care about catalog tuples here the overhead of this
5317 * hashtable should be acceptable.
5318 *
5319 * Heap rewrites complicate this a bit, check rewriteheap.c for
5320 * details.
5321 * -------------------------------------------------------------------------
5322 */
5323
5324/* struct for sorting mapping files by LSN efficiently */
5325typedef struct RewriteMappingFile
5326{
5328 char fname[MAXPGPATH];
5330
5331#ifdef NOT_USED
5332static void
5334{
5337
5340 {
5341 elog(DEBUG3, "mapping: node: %u/%u/%u tid: %u/%u cmin: %u, cmax: %u",
5342 ent->key.rlocator.dbOid,
5343 ent->key.rlocator.spcOid,
5344 ent->key.rlocator.relNumber,
5345 ItemPointerGetBlockNumber(&ent->key.tid),
5347 ent->cmin,
5348 ent->cmax
5349 );
5350 }
5351}
5352#endif
5353
5354/*
5355 * Apply a single mapping file to tuplecid_data.
5356 *
5357 * The mapping file has to have been verified to be a) committed b) for our
5358 * transaction c) applied in LSN order.
5359 */
5360static void
5361ApplyLogicalMappingFile(HTAB *tuplecid_data, const char *fname)
5362{
5363 char path[MAXPGPATH];
5364 int fd;
5365 int readBytes;
5367
5368 sprintf(path, "%s/%s", PG_LOGICAL_MAPPINGS_DIR, fname);
5370 if (fd < 0)
5371 ereport(ERROR,
5373 errmsg("could not open file \"%s\": %m", path)));
5374
5375 while (true)
5376 {
5380 bool found;
5381
5382 /* be careful about padding */
5383 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
5384
5385 /* read all mappings till the end of the file */
5387 readBytes = read(fd, &map, sizeof(LogicalRewriteMappingData));
5389
5390 if (readBytes < 0)
5391 ereport(ERROR,
5393 errmsg("could not read file \"%s\": %m",
5394 path)));
5395 else if (readBytes == 0) /* EOF */
5396 break;
5397 else if (readBytes != sizeof(LogicalRewriteMappingData))
5398 ereport(ERROR,
5400 errmsg("could not read from file \"%s\": read %d instead of %d bytes",
5401 path, readBytes,
5402 (int32) sizeof(LogicalRewriteMappingData))));
5403
5404 key.rlocator = map.old_locator;
5406 &key.tid);
5407
5408
5411
5412 /* no existing mapping, no need to update */
5413 if (!ent)
5414 continue;
5415
5416 key.rlocator = map.new_locator;
5418 &key.tid);
5419
5421 hash_search(tuplecid_data, &key, HASH_ENTER, &found);
5422
5423 if (found)
5424 {
5425 /*
5426 * Make sure the existing mapping makes sense. We sometime update
5427 * old records that did not yet have a cmax (e.g. pg_class' own
5428 * entry while rewriting it) during rewrites, so allow that.
5429 */
5430 Assert(ent->cmin == InvalidCommandId || ent->cmin == new_ent->cmin);
5431 Assert(ent->cmax == InvalidCommandId || ent->cmax == new_ent->cmax);
5432 }
5433 else
5434 {
5435 /* update mapping */
5436 new_ent->cmin = ent->cmin;
5437 new_ent->cmax = ent->cmax;
5438 new_ent->combocid = ent->combocid;
5439 }
5440 }
5441
5442 if (CloseTransientFile(fd) != 0)
5443 ereport(ERROR,
5445 errmsg("could not close file \"%s\": %m", path)));
5446}
5447
5448
5449/*
5450 * Check whether the TransactionId 'xid' is in the pre-sorted array 'xip'.
5451 */
5452static bool
5454{
5455 return bsearch(&xid, xip, num,
5456 sizeof(TransactionId), xidComparator) != NULL;
5457}
5458
5459/*
5460 * list_sort() comparator for sorting RewriteMappingFiles in LSN order.
5461 */
5462static int
5464{
5467
5468 return pg_cmp_u64(a->lsn, b->lsn);
5469}
5470
5471/*
5472 * Apply any existing logical remapping files if there are any targeted at our
5473 * transaction for relid.
5474 */
5475static void
5477{
5479 struct dirent *mapping_de;
5480 List *files = NIL;
5481 ListCell *file;
5482 Oid dboid = IsSharedRelation(relid) ? InvalidOid : MyDatabaseId;
5483
5486 {
5487 Oid f_dboid;
5488 Oid f_relid;
5492 uint32 f_hi,
5493 f_lo;
5495
5496 if (strcmp(mapping_de->d_name, ".") == 0 ||
5497 strcmp(mapping_de->d_name, "..") == 0)
5498 continue;
5499
5500 /* Ignore files that aren't ours */
5501 if (strncmp(mapping_de->d_name, "map-", 4) != 0)
5502 continue;
5503
5505 &f_dboid, &f_relid, &f_hi, &f_lo,
5506 &f_mapped_xid, &f_create_xid) != 6)
5507 elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
5508
5509 f_lsn = ((uint64) f_hi) << 32 | f_lo;
5510
5511 /* mapping for another database */
5512 if (f_dboid != dboid)
5513 continue;
5514
5515 /* mapping for another relation */
5516 if (f_relid != relid)
5517 continue;
5518
5519 /* did the creating transaction abort? */
5521 continue;
5522
5523 /* not for our transaction */
5524 if (!TransactionIdInArray(f_mapped_xid, snapshot->subxip, snapshot->subxcnt))
5525 continue;
5526
5527 /* ok, relevant, queue for apply */
5529 f->lsn = f_lsn;
5530 strcpy(f->fname, mapping_de->d_name);
5531 files = lappend(files, f);
5532 }
5534
5535 /* sort files so we apply them in LSN order */
5537
5538 foreach(file, files)
5539 {
5541
5542 elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
5543 snapshot->subxip[0]);
5545 pfree(f);
5546 }
5547}
5548
5549/*
5550 * Lookup cmin/cmax of a tuple, during logical decoding where we can't rely on
5551 * combo CIDs.
5552 */
5553bool
5555 Snapshot snapshot,
5556 HeapTuple htup, Buffer buffer,
5557 CommandId *cmin, CommandId *cmax)
5558{
5561 ForkNumber forkno;
5562 BlockNumber blockno;
5563 bool updated_mapping = false;
5564
5565 /*
5566 * Return unresolved if tuplecid_data is not valid. That's because when
5567 * streaming in-progress transactions we may run into tuples with the CID
5568 * before actually decoding them. Think e.g. about INSERT followed by
5569 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5570 * INSERT. So in such cases, we assume the CID is from the future
5571 * command.
5572 */
5573 if (tuplecid_data == NULL)
5574 return false;
5575
5576 /* be careful about padding */
5577 memset(&key, 0, sizeof(key));
5578
5579 Assert(!BufferIsLocal(buffer));
5580
5581 /*
5582 * get relfilelocator from the buffer, no convenient way to access it
5583 * other than that.
5584 */
5585 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5586
5587 /* tuples can only be in the main fork */
5588 Assert(forkno == MAIN_FORKNUM);
5589 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5590
5591 ItemPointerCopy(&htup->t_self,
5592 &key.tid);
5593
5594restart:
5597
5598 /*
5599 * failed to find a mapping, check whether the table was rewritten and
5600 * apply mapping if so, but only do that once - there can be no new
5601 * mappings while we are in here since we have to hold a lock on the
5602 * relation.
5603 */
5604 if (ent == NULL && !updated_mapping)
5605 {
5607 /* now check but don't update for a mapping again */
5608 updated_mapping = true;
5609 goto restart;
5610 }
5611 else if (ent == NULL)
5612 return false;
5613
5614 if (cmin)
5615 *cmin = ent->cmin;
5616 if (cmax)
5617 *cmax = ent->cmax;
5618 return true;
5619}
5620
5621/*
5622 * Count invalidation messages of specified transaction.
5623 *
5624 * Returns number of messages, and msgs is set to the pointer of the linked
5625 * list for the messages.
5626 */
5627uint32
5630{
5631 ReorderBufferTXN *txn;
5632
5633 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
5634 false);
5635
5636 if (txn == NULL)
5637 return 0;
5638
5639 *msgs = txn->invalidations;
5640
5641 return txn->ninvalidations;
5642}
void binaryheap_build(binaryheap *heap)
Definition binaryheap.c:136
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:253
bh_node_type binaryheap_first(binaryheap *heap)
Definition binaryheap.c:175
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition binaryheap.c:190
void binaryheap_free(binaryheap *heap)
Definition binaryheap.c:73
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:114
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition binaryheap.c:37
#define binaryheap_empty(h)
Definition binaryheap.h:65
uint32 BlockNumber
Definition block.h:31
static int32 next
Definition blutils.c:225
static void cleanup(void)
Definition bootstrap.c:879
int Buffer
Definition buf.h:23
#define BufferIsLocal(buffer)
Definition buf.h:37
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition bufmgr.c:4378
#define NameStr(name)
Definition c.h:837
#define InvalidCommandId
Definition c.h:755
#define VARHDRSZ
Definition c.h:783
#define Assert(condition)
Definition c.h:945
#define PG_BINARY
Definition c.h:1376
#define FirstCommandId
Definition c.h:754
int32_t int32
Definition c.h:614
uint64_t uint64
Definition c.h:619
#define unlikely(x)
Definition c.h:432
uint32_t uint32
Definition c.h:618
#define pg_fallthrough
Definition c.h:152
void * Pointer
Definition c.h:609
uint32 CommandId
Definition c.h:752
uint32 TransactionId
Definition c.h:738
size_t Size
Definition c.h:691
bool IsToastRelation(Relation relation)
Definition catalog.c:206
bool IsSharedRelation(Oid relationId)
Definition catalog.c:304
int64 TimestampTz
Definition timestamp.h:39
#define INDIRECT_POINTER_SIZE
Definition detoast.h:34
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr)
Definition detoast.h:22
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:358
void hash_destroy(HTAB *hashp)
Definition dynahash.c:865
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1415
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1380
struct cursor * cur
Definition ecpg.c:29
Datum arg
Definition elog.c:1322
void FreeErrorData(ErrorData *edata)
Definition elog.c:2013
int errcode_for_file_access(void)
Definition elog.c:897
ErrorData * CopyErrorData(void)
Definition elog.c:1941
void FlushErrorState(void)
Definition elog.c:2062
#define PG_RE_THROW()
Definition elog.h:405
#define DEBUG3
Definition elog.h:28
#define PG_TRY(...)
Definition elog.h:372
#define DEBUG2
Definition elog.h:29
#define PG_END_TRY(...)
Definition elog.h:397
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define PG_CATCH(...)
Definition elog.h:382
#define elog(elevel,...)
Definition elog.h:226
#define INFO
Definition elog.h:34
#define ereport(elevel,...)
Definition elog.h:150
struct SnapshotData * Snapshot
Definition execnodes.h:59
int FreeDir(DIR *dir)
Definition fd.c:3009
int CloseTransientFile(int fd)
Definition fd.c:2855
struct dirent * ReadDirExtended(DIR *dir, const char *dirname, int elevel)
Definition fd.c:2972
void FileClose(File file)
Definition fd.c:1966
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition fd.c:1563
DIR * AllocateDir(const char *dirname)
Definition fd.c:2891
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2957
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
static ssize_t FileRead(File file, void *buffer, size_t amount, pgoff_t offset, uint32 wait_event_info)
Definition fd.h:225
int File
Definition fd.h:51
#define palloc_object(type)
Definition fe_memutils.h:74
#define repalloc_array(pointer, type, count)
Definition fe_memutils.h:78
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
MemoryContext GenerationContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition generation.c:162
Oid MyDatabaseId
Definition globals.c:94
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1037
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition heaptuple.c:1266
@ HASH_FIND
Definition hsearch.h:113
@ HASH_REMOVE
Definition hsearch.h:115
@ HASH_ENTER
Definition hsearch.h:114
#define HASH_CONTEXT
Definition hsearch.h:102
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_BLOBS
Definition hsearch.h:97
#define HEAPTUPLESIZE
Definition htup.h:73
HeapTupleData * HeapTuple
Definition htup.h:71
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
#define SizeofHeapTupleHeader
#define MaxHeapTupleSize
static Datum fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
static dlist_node * dlist_pop_head_node(dlist_head *head)
Definition ilist.h:450
#define dlist_foreach(iter, lhead)
Definition ilist.h:623
static void dlist_init(dlist_head *head)
Definition ilist.h:314
#define dclist_container(type, membername, ptr)
Definition ilist.h:947
static bool dlist_has_next(const dlist_head *head, const dlist_node *node)
Definition ilist.h:503
static void dclist_push_tail(dclist_head *head, dlist_node *node)
Definition ilist.h:709
static void dlist_insert_before(dlist_node *before, dlist_node *node)
Definition ilist.h:393
#define dlist_head_element(type, membername, lhead)
Definition ilist.h:603
static dlist_node * dlist_next_node(dlist_head *head, dlist_node *node)
Definition ilist.h:537
static void dlist_delete(dlist_node *node)
Definition ilist.h:405
static uint32 dclist_count(const dclist_head *head)
Definition ilist.h:932
#define dlist_foreach_modify(iter, lhead)
Definition ilist.h:640
static bool dlist_is_empty(const dlist_head *head)
Definition ilist.h:336
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition ilist.h:364
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition ilist.h:763
static void dclist_init(dclist_head *head)
Definition ilist.h:671
#define dlist_container(type, membername, ptr)
Definition ilist.h:593
#define dclist_foreach(iter, lhead)
Definition ilist.h:970
static int pg_cmp_u64(uint64 a, uint64 b)
Definition int.h:731
#define write(a, b, c)
Definition win32.h:14
#define read(a, b, c)
Definition win32.h:13
void LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
Definition inval.c:823
void InvalidateSystemCaches(void)
Definition inval.c:916
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition itemptr.h:172
List * lappend(List *list, void *datum)
Definition list.c:339
void list_sort(List *list, list_sort_comparator cmp)
Definition list.c:1674
void UpdateDecodingStats(LogicalDecodingContext *ctx)
Definition logical.c:1943
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition mcxt.c:1266
char * pstrdup(const char *in)
Definition mcxt.c:1781
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
void * palloc(Size size)
Definition mcxt.c:1387
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define SLAB_DEFAULT_BLOCK_SIZE
Definition memutils.h:189
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
static char * errmsg
void pairingheap_remove(pairingheap *heap, pairingheap_node *node)
void pairingheap_add(pairingheap *heap, pairingheap_node *node)
pairingheap * pairingheap_allocate(pairingheap_comparator compare, void *arg)
Definition pairingheap.c:42
pairingheap_node * pairingheap_first(pairingheap *heap)
#define pairingheap_container(type, membername, ptr)
Definition pairingheap.h:43
#define pairingheap_const_container(type, membername, ptr)
Definition pairingheap.h:51
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
#define MAXPGPATH
const void * data
#define lfirst(lc)
Definition pg_list.h:172
#define NIL
Definition pg_list.h:68
#define sprintf
Definition port.h:262
#define snprintf
Definition port.h:260
#define qsort(a, b, c, d)
Definition port.h:495
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
static Oid DatumGetObjectId(Datum X)
Definition postgres.h:242
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
static Datum Int32GetDatum(int32 X)
Definition postgres.h:212
static int32 DatumGetInt32(Datum X)
Definition postgres.h:202
#define InvalidOid
unsigned int Oid
static int fd(const char *x, int i)
static int fb(int x)
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1401
#define RelationIsLogicallyLogged(relation)
Definition rel.h:710
#define RelationGetDescr(relation)
Definition rel.h:540
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationIsValid(relation)
Definition rel.h:489
Relation RelationIdGetRelation(Oid relationId)
Definition relcache.c:2088
void RelationClose(Relation relation)
Definition relcache.c:2209
Oid RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber)
ForkNumber
Definition relpath.h:56
@ MAIN_FORKNUM
Definition relpath.h:58
#define relpathperm(rlocator, forknum)
Definition relpath.h:146
static int file_sort_by_lsn(const ListCell *a_p, const ListCell *b_p)
void ReorderBufferFreeRelids(ReorderBuffer *rb, Oid *relids)
void ReorderBufferFreeChange(ReorderBuffer *rb, ReorderBufferChange *change, bool upd_mem)
static void ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
void ReorderBufferXidSetCatalogChanges(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
static void ReorderBufferStreamCommit(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferAddNewCommandId(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, CommandId cid)
static void ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferReplay(ReorderBufferTXN *txn, ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
static void ReorderBufferAccumulateInvalidations(SharedInvalidationMessage **invals_out, uint32 *ninvals_out, SharedInvalidationMessage *msgs_new, Size nmsgs_new)
static ReorderBufferTXN * ReorderBufferLargestTXN(ReorderBuffer *rb)
void ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, RelFileLocator locator, ItemPointerData tid, CommandId cmin, CommandId cmax, CommandId combocid)
void ReorderBufferSetBaseSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
static void ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferToastInitHash(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, TimestampTz abort_time)
static bool ReorderBufferCanStartStreaming(ReorderBuffer *rb)
static void ReorderBufferResetTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id, XLogRecPtr last_lsn, ReorderBufferChange *specinsert)
bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *rb, TransactionId xid)
void ReorderBufferInvalidate(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
TransactionId ReorderBufferGetOldestXmin(ReorderBuffer *rb)
static int ReorderBufferIterCompare(Datum a, Datum b, void *arg)
static void ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferIterTXNState *volatile *iter_state)
bool ResolveCminCmaxDuringDecoding(HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
static void ReorderBufferToastAppendChunk(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
void ReorderBufferFreeTupleBuf(HeapTuple tuple)
void ReorderBufferQueueChange(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, ReorderBufferChange *change, bool toast_insert)
void ReorderBufferPrepare(ReorderBuffer *rb, TransactionId xid, char *gid)
uint32 ReorderBufferGetInvalidations(ReorderBuffer *rb, TransactionId xid, SharedInvalidationMessage **msgs)
void ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
void ReorderBufferCommitChild(ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn)
TransactionId * ReorderBufferGetCatalogChangesXacts(ReorderBuffer *rb)
static void ReorderBufferSaveTXNSnapshot(ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id)
#define IsSpecInsert(action)
static Size ReorderBufferChangeSize(ReorderBufferChange *change)
int logical_decoding_work_mem
static void AssertChangeLsnOrder(ReorderBufferTXN *txn)
static bool ReorderBufferCanStream(ReorderBuffer *rb)
static int ReorderBufferTXNSizeCompare(const pairingheap_node *a, const pairingheap_node *b, void *arg)
static void ReorderBufferApplyChange(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change, bool streaming)
void ReorderBufferSkipPrepare(ReorderBuffer *rb, TransactionId xid)
bool ReorderBufferRememberPrepareInfo(ReorderBuffer *rb, TransactionId xid, XLogRecPtr prepare_lsn, XLogRecPtr end_lsn, TimestampTz prepare_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
void ReorderBufferFinishPrepared(ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, XLogRecPtr two_phase_at, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn, char *gid, bool is_commit)
static void ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn, int fd, ReorderBufferChange *change)
void ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
void ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
int debug_logical_replication_streaming
void ReorderBufferAddDistributedInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
#define IsInsertOrUpdate(action)
static void ReorderBufferSerializeReserve(ReorderBuffer *rb, Size sz)
void ReorderBufferQueueMessage(ReorderBuffer *rb, TransactionId xid, Snapshot snap, XLogRecPtr lsn, bool transactional, const char *prefix, Size message_size, const char *message)
bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *rb, TransactionId xid)
static void ReorderBufferExecuteInvalidations(uint32 nmsgs, SharedInvalidationMessage *msgs)
static void ReorderBufferIterTXNFinish(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
void ReorderBufferAddSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
static void ReorderBufferTruncateTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, bool txn_prepared)
#define CHANGES_THRESHOLD
static ReorderBufferTXN * ReorderBufferLargestStreamableTopTXN(ReorderBuffer *rb)
static bool ReorderBufferCheckAndTruncateAbortedTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn, char *data)
HeapTuple ReorderBufferAllocTupleBuf(ReorderBuffer *rb, Size tuple_len)
static void AssertTXNLsnOrder(ReorderBuffer *rb)
#define MAX_DISTR_INVAL_MSG_PER_TXN
static void ReorderBufferApplyMessage(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool streaming)
static void ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap)
static void ReorderBufferCleanupSerializedTXNs(const char *slotname)
ReorderBufferChange * ReorderBufferAllocChange(ReorderBuffer *rb)
void ReorderBufferSetRestartPoint(ReorderBuffer *rb, XLogRecPtr ptr)
static void SetupCheckXidLive(TransactionId xid)
static bool TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
static Snapshot ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap, ReorderBufferTXN *txn, CommandId cid)
static void ReorderBufferApplyTruncate(ReorderBuffer *rb, ReorderBufferTXN *txn, int nrelations, Relation *relations, ReorderBufferChange *change, bool streaming)
static void ReorderBufferProcessPartialChange(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool toast_insert)
static void ReorderBufferToastReset(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static void ReorderBufferQueueInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
static ReorderBufferTXN * ReorderBufferAllocTXN(ReorderBuffer *rb)
static void ReorderBufferFreeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferImmediateInvalidation(ReorderBuffer *rb, uint32 ninvalidations, SharedInvalidationMessage *invalidations)
static void ReorderBufferTransferSnapToParent(ReorderBufferTXN *txn, ReorderBufferTXN *subtxn)
static void ReorderBufferBuildTupleCidHash(ReorderBuffer *rb, ReorderBufferTXN *txn)
static ReorderBufferChange * ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
Oid * ReorderBufferAllocRelids(ReorderBuffer *rb, int nrelids)
static void ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
static void ReorderBufferChangeMemoryUpdate(ReorderBuffer *rb, ReorderBufferChange *change, ReorderBufferTXN *txn, bool addition, Size sz)
static void ReorderBufferStreamTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferProcessXid(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
static Size ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn, TXNEntryFile *file, XLogSegNo *segno)
void ReorderBufferAssignChild(ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr lsn)
void ReorderBufferFree(ReorderBuffer *rb)
static void ReorderBufferSerializedPath(char *path, ReplicationSlot *slot, TransactionId xid, XLogSegNo segno)
#define IsSpecConfirmOrAbort(action)
static void ApplyLogicalMappingFile(HTAB *tuplecid_data, const char *fname)
static const Size max_changes_in_memory
void StartupReorderBuffer(void)
void ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid)
static ReorderBufferTXN * ReorderBufferTXNByXid(ReorderBuffer *rb, TransactionId xid, bool create, bool *is_new, XLogRecPtr lsn, bool create_as_top)
static void ReorderBufferMaybeMarkTXNStreamed(ReorderBuffer *rb, ReorderBufferTXN *txn)
ReorderBufferTXN * ReorderBufferGetOldestTXN(ReorderBuffer *rb)
static void ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr commit_lsn, volatile Snapshot snapshot_now, volatile CommandId command_id, bool streaming)
#define rbtxn_is_committed(txn)
#define rbtxn_has_streamable_change(txn)
#define rbtxn_has_catalog_changes(txn)
@ DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE
@ DEBUG_LOGICAL_REP_STREAMING_BUFFERED
#define RBTXN_PREPARE_STATUS_MASK
#define rbtxn_is_serialized_clear(txn)
#define RBTXN_IS_STREAMED
#define rbtxn_is_prepared(txn)
#define RBTXN_HAS_PARTIAL_CHANGE
#define rbtxn_is_streamed(txn)
#define RBTXN_SENT_PREPARE
#define rbtxn_is_toptxn(txn)
#define rbtxn_get_toptxn(txn)
#define rbtxn_is_known_subxact(txn)
#define rbtxn_is_subtxn(txn)
#define RBTXN_HAS_CATALOG_CHANGES
#define RBTXN_IS_COMMITTED
#define PG_LOGICAL_MAPPINGS_DIR
#define RBTXN_DISTR_INVAL_OVERFLOWED
#define RBTXN_IS_SERIALIZED_CLEAR
#define rbtxn_sent_prepare(txn)
#define RBTXN_IS_PREPARED
#define rbtxn_distr_inval_overflowed(txn)
#define RBTXN_SKIPPED_PREPARE
#define RBTXN_HAS_STREAMABLE_CHANGE
@ REORDER_BUFFER_CHANGE_INVALIDATION
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM
@ REORDER_BUFFER_CHANGE_MESSAGE
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT
@ REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID
@ REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID
@ REORDER_BUFFER_CHANGE_TRUNCATE
@ REORDER_BUFFER_CHANGE_DELETE
@ REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT
#define rbtxn_is_aborted(txn)
#define RBTXN_IS_SERIALIZED
#define rbtxn_is_serialized(txn)
#define RBTXN_IS_ABORTED
#define RBTXN_IS_SUBXACT
#define rbtxn_has_partial_change(txn)
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
#define LOGICAL_REWRITE_FORMAT
Definition rewriteheap.h:54
MemoryContext SlabContextCreate(MemoryContext parent, const char *name, Size blockSize, Size chunkSize)
Definition slab.c:322
ReplicationSlot * MyReplicationSlot
Definition slot.c:149
bool ReplicationSlotValidateName(const char *name, bool allow_reserved_name, int elevel)
Definition slot.c:268
#define PG_REPLSLOT_DIR
Definition slot.h:21
void SnapBuildSnapDecRefcount(Snapshot snap)
Definition snapbuild.c:331
bool SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr)
Definition snapbuild.c:307
SnapBuildState SnapBuildCurrentState(SnapBuild *builder)
Definition snapbuild.c:280
@ SNAPBUILD_CONSISTENT
Definition snapbuild.h:50
void TeardownHistoricSnapshot(bool is_error)
Definition snapmgr.c:1685
void SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
Definition snapmgr.c:1669
static HTAB * tuplecid_data
Definition snapmgr.c:163
#define free(a)
bool attisdropped
Definition tupdesc.h:78
Definition dirent.c:26
Size keysize
Definition hsearch.h:75
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
Definition pg_list.h:54
XLogReaderState * reader
Definition logical.h:42
struct SnapBuild * snapshot_builder
Definition logical.h:44
ItemPointerData new_tid
Definition rewriteheap.h:40
RelFileLocator old_locator
Definition rewriteheap.h:37
ItemPointerData old_tid
Definition rewriteheap.h:39
RelFileLocator new_locator
Definition rewriteheap.h:38
RelFileNumber relNumber
Form_pg_class rd_rel
Definition rel.h:111
ReorderBufferChangeType action
struct ReorderBufferChange::@117::@119 truncate
struct ReorderBufferChange::@117::@121 tuplecid
RelFileLocator rlocator
ItemPointerData tid
union ReorderBufferChange::@117 data
struct ReorderBufferChange::@117::@122 inval
struct ReorderBufferChange::@117::@120 msg
struct ReorderBufferTXN * txn
RelFileLocator locator
ReplOriginId origin_id
SharedInvalidationMessage * invalidations
struct ReorderBufferChange::@117::@118 tp
ReorderBufferChange change
ReorderBufferChange * change
ReorderBufferTXN * txn
XLogRecPtr restart_decoding_lsn
pairingheap_node txn_node
TimestampTz commit_time
XLogRecPtr base_snapshot_lsn
TransactionId toplevel_xid
dlist_node catchange_node
SharedInvalidationMessage * invalidations
dlist_head tuplecids
XLogRecPtr first_lsn
TimestampTz abort_time
XLogRecPtr final_lsn
void * output_plugin_private
uint32 ninvalidations_distributed
XLogRecPtr origin_lsn
TimestampTz prepare_time
TransactionId xid
ReplOriginId origin_id
dlist_node base_snapshot_node
SharedInvalidationMessage * invalidations_distributed
dlist_head txns_by_base_snapshot_lsn
MemoryContext context
dclist_head catchange_txns
MemoryContext change_context
ReorderBufferTXN * by_txn_last_txn
TransactionId by_txn_last_xid
MemoryContext tup_context
dlist_head toplevel_by_lsn
pairingheap * txn_heap
MemoryContext txn_context
XLogRecPtr current_restart_decoding_lsn
ReplicationSlotPersistentData data
Definition slot.h:213
char fname[MAXPGPATH]
TransactionId xmin
Definition snapshot.h:153
int32 subxcnt
Definition snapshot.h:177
CommandId curcid
Definition snapshot.h:183
uint32 xcnt
Definition snapshot.h:165
TransactionId * subxip
Definition snapshot.h:176
XLogRecPtr EndRecPtr
Definition xlogreader.h:206
XLogRecPtr ReadRecPtr
Definition xlogreader.h:205
dlist_node * cur
Definition ilist.h:179
dlist_node * cur
Definition ilist.h:200
int32 va_rawsize
Definition varatt.h:34
Definition c.h:778
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:193
#define VARHDRSZ_SHORT
Definition varatt.h:278
static bool VARATT_IS_SHORT(const void *PTR)
Definition varatt.h:403
static void SET_VARSIZE_COMPRESSED(void *PTR, Size len)
Definition varatt.h:446
static bool VARATT_IS_EXTENDED(const void *PTR)
Definition varatt.h:410
static bool VARATT_IS_EXTERNAL(const void *PTR)
Definition varatt.h:354
static char * VARDATA_EXTERNAL(const void *PTR)
Definition varatt.h:340
static Size VARSIZE(const void *PTR)
Definition varatt.h:298
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static void SET_VARTAG_EXTERNAL(void *PTR, vartag_external tag)
Definition varatt.h:453
static Size VARATT_EXTERNAL_GET_EXTSIZE(varatt_external toast_pointer)
Definition varatt.h:507
@ VARTAG_INDIRECT
Definition varatt.h:86
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
static bool VARATT_EXTERNAL_IS_COMPRESSED(varatt_external toast_pointer)
Definition varatt.h:536
static Size VARSIZE_SHORT(const void *PTR)
Definition varatt.h:312
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85
#define lstat(path, sb)
Definition win32_port.h:275
#define S_ISDIR(m)
Definition win32_port.h:315
bool IsTransactionOrTransactionBlock(void)
Definition xact.c:5012
void BeginInternalSubTransaction(const char *name)
Definition xact.c:4717
TransactionId CheckXidAlive
Definition xact.c:101
void RollbackAndReleaseCurrentSubTransaction(void)
Definition xact.c:4819
void StartTransactionCommand(void)
Definition xact.c:3081
TransactionId GetCurrentTransactionIdIfAny(void)
Definition xact.c:473
TransactionId GetCurrentTransactionId(void)
Definition xact.c:456
void AbortCurrentTransaction(void)
Definition xact.c:3473
int xidComparator(const void *arg1, const void *arg2)
Definition xid.c:152
int wal_segment_size
Definition xlog.c:147
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint16 ReplOriginId
Definition xlogdefs.h:69
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
uint64 XLogSegNo
Definition xlogdefs.h:52

◆ IsSpecConfirmOrAbort

#define IsSpecConfirmOrAbort (   action)
Value:

Definition at line 202 of file reorderbuffer.c.

◆ IsSpecInsert

#define IsSpecInsert (   action)
Value:

Definition at line 198 of file reorderbuffer.c.

◆ MAX_DISTR_INVAL_MSG_PER_TXN

#define MAX_DISTR_INVAL_MSG_PER_TXN    ((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage))

Definition at line 126 of file reorderbuffer.c.

Typedef Documentation

◆ ReorderBufferDiskChange

◆ ReorderBufferIterTXNEntry

◆ ReorderBufferIterTXNState

◆ ReorderBufferToastEnt

◆ ReorderBufferTupleCidEnt

◆ ReorderBufferTupleCidKey

◆ ReorderBufferTXNByIdEnt

◆ RewriteMappingFile

◆ TXNEntryFile

Function Documentation

◆ ApplyLogicalMappingFile()

static void ApplyLogicalMappingFile ( HTAB tuplecid_data,
const char fname 
)
static

Definition at line 5362 of file reorderbuffer.c.

5363{
5364 char path[MAXPGPATH];
5365 int fd;
5366 int readBytes;
5368
5369 sprintf(path, "%s/%s", PG_LOGICAL_MAPPINGS_DIR, fname);
5371 if (fd < 0)
5372 ereport(ERROR,
5374 errmsg("could not open file \"%s\": %m", path)));
5375
5376 while (true)
5377 {
5381 bool found;
5382
5383 /* be careful about padding */
5384 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
5385
5386 /* read all mappings till the end of the file */
5388 readBytes = read(fd, &map, sizeof(LogicalRewriteMappingData));
5390
5391 if (readBytes < 0)
5392 ereport(ERROR,
5394 errmsg("could not read file \"%s\": %m",
5395 path)));
5396 else if (readBytes == 0) /* EOF */
5397 break;
5398 else if (readBytes != sizeof(LogicalRewriteMappingData))
5399 ereport(ERROR,
5401 errmsg("could not read from file \"%s\": read %d instead of %d bytes",
5402 path, readBytes,
5403 (int32) sizeof(LogicalRewriteMappingData))));
5404
5405 key.rlocator = map.old_locator;
5407 &key.tid);
5408
5409
5412
5413 /* no existing mapping, no need to update */
5414 if (!ent)
5415 continue;
5416
5417 key.rlocator = map.new_locator;
5419 &key.tid);
5420
5422 hash_search(tuplecid_data, &key, HASH_ENTER, &found);
5423
5424 if (found)
5425 {
5426 /*
5427 * Make sure the existing mapping makes sense. We sometime update
5428 * old records that did not yet have a cmax (e.g. pg_class' own
5429 * entry while rewriting it) during rewrites, so allow that.
5430 */
5431 Assert(ent->cmin == InvalidCommandId || ent->cmin == new_ent->cmin);
5432 Assert(ent->cmax == InvalidCommandId || ent->cmax == new_ent->cmax);
5433 }
5434 else
5435 {
5436 /* update mapping */
5437 new_ent->cmin = ent->cmin;
5438 new_ent->cmax = ent->cmax;
5439 new_ent->combocid = ent->combocid;
5440 }
5441 }
5442
5443 if (CloseTransientFile(fd) != 0)
5444 ereport(ERROR,
5446 errmsg("could not close file \"%s\": %m", path)));
5447}

References Assert, CloseTransientFile(), ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), HASH_ENTER, HASH_FIND, hash_search(), InvalidCommandId, ItemPointerCopy(), MAXPGPATH, LogicalRewriteMappingData::new_locator, LogicalRewriteMappingData::new_tid, LogicalRewriteMappingData::old_locator, LogicalRewriteMappingData::old_tid, OpenTransientFile(), PG_BINARY, PG_LOGICAL_MAPPINGS_DIR, pgstat_report_wait_end(), pgstat_report_wait_start(), read, sprintf, and tuplecid_data.

Referenced by UpdateLogicalMappings().

◆ AssertChangeLsnOrder()

static void AssertChangeLsnOrder ( ReorderBufferTXN txn)
static

Definition at line 1014 of file reorderbuffer.c.

1015{
1016#ifdef USE_ASSERT_CHECKING
1017 dlist_iter iter;
1019
1020 dlist_foreach(iter, &txn->changes)
1021 {
1023
1025
1028 Assert(txn->first_lsn <= cur_change->lsn);
1029
1030 if (XLogRecPtrIsValid(txn->end_lsn))
1031 Assert(cur_change->lsn <= txn->end_lsn);
1032
1034
1035 prev_lsn = cur_change->lsn;
1036 }
1037#endif
1038}

References Assert, ReorderBufferTXN::changes, dlist_iter::cur, dlist_container, dlist_foreach, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::first_lsn, and XLogRecPtrIsValid.

Referenced by ReorderBufferIterTXNInit().

◆ AssertTXNLsnOrder()

static void AssertTXNLsnOrder ( ReorderBuffer rb)
static

Definition at line 943 of file reorderbuffer.c.

944{
945#ifdef USE_ASSERT_CHECKING
946 LogicalDecodingContext *ctx = rb->private_data;
947 dlist_iter iter;
950
951 /*
952 * Skip the verification if we don't reach the LSN at which we start
953 * decoding the contents of transactions yet because until we reach the
954 * LSN, we could have transactions that don't have the association between
955 * the top-level transaction and subtransaction yet and consequently have
956 * the same LSN. We don't guarantee this association until we try to
957 * decode the actual contents of transaction. The ordering of the records
958 * prior to the start_decoding_at LSN should have been checked before the
959 * restart.
960 */
962 return;
963
964 dlist_foreach(iter, &rb->toplevel_by_lsn)
965 {
967 iter.cur);
968
969 /* start LSN must be set */
970 Assert(XLogRecPtrIsValid(cur_txn->first_lsn));
971
972 /* If there is an end LSN, it must be higher than start LSN */
973 if (XLogRecPtrIsValid(cur_txn->end_lsn))
974 Assert(cur_txn->first_lsn <= cur_txn->end_lsn);
975
976 /* Current initial LSN must be strictly higher than previous */
979
980 /* known-as-subtxn txns must not be listed */
982
983 prev_first_lsn = cur_txn->first_lsn;
984 }
985
986 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
987 {
989 base_snapshot_node,
990 iter.cur);
991
992 /* base snapshot (and its LSN) must be set */
993 Assert(cur_txn->base_snapshot != NULL);
994 Assert(XLogRecPtrIsValid(cur_txn->base_snapshot_lsn));
995
996 /* current LSN must be strictly higher than previous */
998 Assert(prev_base_snap_lsn < cur_txn->base_snapshot_lsn);
999
1000 /* known-as-subtxn txns must not be listed */
1002
1003 prev_base_snap_lsn = cur_txn->base_snapshot_lsn;
1004 }
1005#endif
1006}

References Assert, dlist_iter::cur, dlist_container, dlist_foreach, XLogReaderState::EndRecPtr, fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, LogicalDecodingContext::reader, SnapBuildXactNeedsSkip(), LogicalDecodingContext::snapshot_builder, and XLogRecPtrIsValid.

Referenced by ReorderBufferAssignChild(), ReorderBufferGetOldestTXN(), ReorderBufferGetOldestXmin(), ReorderBufferSetBaseSnapshot(), and ReorderBufferTXNByXid().

◆ file_sort_by_lsn()

static int file_sort_by_lsn ( const ListCell a_p,
const ListCell b_p 
)
static

Definition at line 5464 of file reorderbuffer.c.

5465{
5468
5469 return pg_cmp_u64(a->lsn, b->lsn);
5470}

References a, b, fb(), lfirst, and pg_cmp_u64().

Referenced by UpdateLogicalMappings().

◆ ReorderBufferAbort()

void ReorderBufferAbort ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
TimestampTz  abort_time 
)

Definition at line 3085 of file reorderbuffer.c.

3087{
3088 ReorderBufferTXN *txn;
3089
3090 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3091 false);
3092
3093 /* unknown, nothing to remove */
3094 if (txn == NULL)
3095 return;
3096
3097 txn->abort_time = abort_time;
3098
3099 /* For streamed transactions notify the remote node about the abort. */
3100 if (rbtxn_is_streamed(txn))
3101 {
3102 rb->stream_abort(rb, txn, lsn);
3103
3104 /*
3105 * We might have decoded changes for this transaction that could load
3106 * the cache as per the current transaction's view (consider DDL's
3107 * happened in this transaction). We don't want the decoding of future
3108 * transactions to use those cache entries so execute only the inval
3109 * messages in this transaction.
3110 */
3111 if (txn->ninvalidations > 0)
3113 txn->invalidations);
3114 }
3115
3116 /* cosmetic... */
3117 txn->final_lsn = lsn;
3118
3119 /* remove potential on-disk data, and deallocate */
3121}

References ReorderBufferTXN::abort_time, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodeAbort().

◆ ReorderBufferAbortOld()

void ReorderBufferAbortOld ( ReorderBuffer rb,
TransactionId  oldestRunningXid 
)

Definition at line 3131 of file reorderbuffer.c.

3132{
3134
3135 /*
3136 * Iterate through all (potential) toplevel TXNs and abort all that are
3137 * older than what possibly can be running. Once we've found the first
3138 * that is alive we stop, there might be some that acquired an xid earlier
3139 * but started writing later, but it's unlikely and they will be cleaned
3140 * up in a later call to this function.
3141 */
3142 dlist_foreach_modify(it, &rb->toplevel_by_lsn)
3143 {
3144 ReorderBufferTXN *txn;
3145
3146 txn = dlist_container(ReorderBufferTXN, node, it.cur);
3147
3148 if (TransactionIdPrecedes(txn->xid, oldestRunningXid))
3149 {
3150 elog(DEBUG2, "aborting old transaction %u", txn->xid);
3151
3152 /* Notify the remote node about the crash/immediate restart. */
3153 if (rbtxn_is_streamed(txn))
3154 rb->stream_abort(rb, txn, InvalidXLogRecPtr);
3155
3156 /* remove potential on-disk data, and deallocate this tx */
3158 }
3159 else
3160 return;
3161 }
3162}

References DEBUG2, dlist_container, dlist_foreach_modify, elog, fb(), InvalidXLogRecPtr, rbtxn_is_streamed, ReorderBufferCleanupTXN(), TransactionIdPrecedes(), and ReorderBufferTXN::xid.

Referenced by standby_decode().

◆ ReorderBufferAccumulateInvalidations()

static void ReorderBufferAccumulateInvalidations ( SharedInvalidationMessage **  invals_out,
uint32 ninvals_out,
SharedInvalidationMessage msgs_new,
Size  nmsgs_new 
)
static

Definition at line 3503 of file reorderbuffer.c.

3507{
3508 if (*ninvals_out == 0)
3509 {
3513 }
3514 else
3515 {
3516 /* Enlarge the array of inval messages */
3517 *invals_out =
3519 (*ninvals_out + nmsgs_new));
3523 }
3524}

References fb(), palloc_array, and repalloc_array.

Referenced by ReorderBufferAddDistributedInvalidations(), and ReorderBufferAddInvalidations().

◆ ReorderBufferAddDistributedInvalidations()

void ReorderBufferAddDistributedInvalidations ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Size  nmsgs,
SharedInvalidationMessage msgs 
)

Definition at line 3581 of file reorderbuffer.c.

3584{
3585 ReorderBufferTXN *txn;
3586 MemoryContext oldcontext;
3587
3588 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3589
3590 oldcontext = MemoryContextSwitchTo(rb->context);
3591
3592 /*
3593 * Collect all the invalidations under the top transaction, if available,
3594 * so that we can execute them all together. See comments
3595 * ReorderBufferAddInvalidations.
3596 */
3597 txn = rbtxn_get_toptxn(txn);
3598
3599 Assert(nmsgs > 0);
3600
3602 {
3603 /*
3604 * Check the transaction has enough space for storing distributed
3605 * invalidation messages.
3606 */
3608 {
3609 /*
3610 * Mark the invalidation message as overflowed and free up the
3611 * messages accumulated so far.
3612 */
3614
3616 {
3620 }
3621 }
3622 else
3625 msgs, nmsgs);
3626 }
3627
3628 /* Queue the invalidation messages into the transaction */
3629 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3630
3631 MemoryContextSwitchTo(oldcontext);
3632}

References Assert, fb(), ReorderBufferTXN::invalidations_distributed, MAX_DISTR_INVAL_MSG_PER_TXN, MemoryContextSwitchTo(), ReorderBufferTXN::ninvalidations_distributed, pfree(), RBTXN_DISTR_INVAL_OVERFLOWED, rbtxn_distr_inval_overflowed, rbtxn_get_toptxn, ReorderBufferAccumulateInvalidations(), ReorderBufferQueueInvalidations(), ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by SnapBuildDistributeSnapshotAndInval().

◆ ReorderBufferAddInvalidations()

void ReorderBufferAddInvalidations ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Size  nmsgs,
SharedInvalidationMessage msgs 
)

Definition at line 3540 of file reorderbuffer.c.

3543{
3544 ReorderBufferTXN *txn;
3545 MemoryContext oldcontext;
3546
3547 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3548
3549 oldcontext = MemoryContextSwitchTo(rb->context);
3550
3551 /*
3552 * Collect all the invalidations under the top transaction, if available,
3553 * so that we can execute them all together. See comments atop this
3554 * function.
3555 */
3556 txn = rbtxn_get_toptxn(txn);
3557
3558 Assert(nmsgs > 0);
3559
3561 &txn->ninvalidations,
3562 msgs, nmsgs);
3563
3564 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3565
3566 MemoryContextSwitchTo(oldcontext);
3567}

References Assert, fb(), ReorderBufferTXN::invalidations, MemoryContextSwitchTo(), ReorderBufferTXN::ninvalidations, rbtxn_get_toptxn, ReorderBufferAccumulateInvalidations(), ReorderBufferQueueInvalidations(), and ReorderBufferTXNByXid().

Referenced by xact_decode().

◆ ReorderBufferAddNewCommandId()

◆ ReorderBufferAddNewTupleCids()

◆ ReorderBufferAddSnapshot()

◆ ReorderBufferAllocate()

ReorderBuffer * ReorderBufferAllocate ( void  )

Definition at line 325 of file reorderbuffer.c.

326{
327 ReorderBuffer *buffer;
330
332
333 /* allocate memory in own context, to have better accountability */
335 "ReorderBuffer",
337
338 buffer =
340
341 memset(&hash_ctl, 0, sizeof(hash_ctl));
342
343 buffer->context = new_ctx;
344
346 "Change",
348 sizeof(ReorderBufferChange));
349
351 "TXN",
353 sizeof(ReorderBufferTXN));
354
355 /*
356 * To minimize memory fragmentation caused by long-running transactions
357 * with changes spanning multiple memory blocks, we use a single
358 * fixed-size memory block for decoded tuple storage. The performance
359 * testing showed that the default memory block size maintains logical
360 * decoding performance without causing fragmentation due to concurrent
361 * transactions. One might think that we can use the max size as
362 * SLAB_LARGE_BLOCK_SIZE but the test also showed it doesn't help resolve
363 * the memory fragmentation.
364 */
366 "Tuples",
370
371 hash_ctl.keysize = sizeof(TransactionId);
372 hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
373 hash_ctl.hcxt = buffer->context;
374
375 buffer->by_txn = hash_create("ReorderBufferByXid", 1000, &hash_ctl,
377
379 buffer->by_txn_last_txn = NULL;
380
381 buffer->outbuf = NULL;
382 buffer->outbufsize = 0;
383 buffer->size = 0;
384
385 /* txn_heap is ordered by transaction size */
387
388 buffer->spillTxns = 0;
389 buffer->spillCount = 0;
390 buffer->spillBytes = 0;
391 buffer->streamTxns = 0;
392 buffer->streamCount = 0;
393 buffer->streamBytes = 0;
394 buffer->memExceededCount = 0;
395 buffer->totalTxns = 0;
396 buffer->totalBytes = 0;
397
399
400 dlist_init(&buffer->toplevel_by_lsn);
402 dclist_init(&buffer->catchange_txns);
403
404 /*
405 * Ensure there's no stale data from prior uses of this slot, in case some
406 * prior exit avoided calling ReorderBufferFree. Failure to do this can
407 * produce duplicated txns, and it's very cheap if there's nothing there.
408 */
410
411 return buffer;
412}

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ReorderBuffer::by_txn, ReorderBuffer::by_txn_last_txn, ReorderBuffer::by_txn_last_xid, ReorderBuffer::catchange_txns, ReorderBuffer::change_context, ReorderBuffer::context, ReorderBuffer::current_restart_decoding_lsn, CurrentMemoryContext, ReplicationSlot::data, dclist_init(), dlist_init(), fb(), GenerationContextCreate(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, InvalidTransactionId, InvalidXLogRecPtr, ReorderBuffer::memExceededCount, MemoryContextAlloc(), MyReplicationSlot, ReplicationSlotPersistentData::name, NameStr, ReorderBuffer::outbuf, ReorderBuffer::outbufsize, pairingheap_allocate(), ReorderBufferCleanupSerializedTXNs(), ReorderBufferTXNSizeCompare(), ReorderBuffer::size, SLAB_DEFAULT_BLOCK_SIZE, SlabContextCreate(), ReorderBuffer::spillBytes, ReorderBuffer::spillCount, ReorderBuffer::spillTxns, ReorderBuffer::streamBytes, ReorderBuffer::streamCount, ReorderBuffer::streamTxns, ReorderBuffer::toplevel_by_lsn, ReorderBuffer::totalBytes, ReorderBuffer::totalTxns, ReorderBuffer::tup_context, ReorderBuffer::txn_context, ReorderBuffer::txn_heap, and ReorderBuffer::txns_by_base_snapshot_lsn.

Referenced by StartupDecodingContext().

◆ ReorderBufferAllocChange()

◆ ReorderBufferAllocRelids()

Oid * ReorderBufferAllocRelids ( ReorderBuffer rb,
int  nrelids 
)

Definition at line 626 of file reorderbuffer.c.

627{
628 Oid *relids;
630
631 alloc_len = sizeof(Oid) * nrelids;
632
633 relids = (Oid *) MemoryContextAlloc(rb->context, alloc_len);
634
635 return relids;
636}

References fb(), and MemoryContextAlloc().

Referenced by DecodeTruncate(), and ReorderBufferRestoreChange().

◆ ReorderBufferAllocTupleBuf()

HeapTuple ReorderBufferAllocTupleBuf ( ReorderBuffer rb,
Size  tuple_len 
)

Definition at line 593 of file reorderbuffer.c.

594{
595 HeapTuple tuple;
597
598 alloc_len = tuple_len + SizeofHeapTupleHeader;
599
600 tuple = (HeapTuple) MemoryContextAlloc(rb->tup_context,
602 tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
603
604 return tuple;
605}

References fb(), HEAPTUPLESIZE, MemoryContextAlloc(), SizeofHeapTupleHeader, and HeapTupleData::t_data.

Referenced by DecodeDelete(), DecodeInsert(), DecodeMultiInsert(), DecodeUpdate(), and ReorderBufferRestoreChange().

◆ ReorderBufferAllocTXN()

static ReorderBufferTXN * ReorderBufferAllocTXN ( ReorderBuffer rb)
static

Definition at line 436 of file reorderbuffer.c.

437{
438 ReorderBufferTXN *txn;
439
440 txn = (ReorderBufferTXN *)
441 MemoryContextAlloc(rb->txn_context, sizeof(ReorderBufferTXN));
442
443 memset(txn, 0, sizeof(ReorderBufferTXN));
444
445 dlist_init(&txn->changes);
446 dlist_init(&txn->tuplecids);
447 dlist_init(&txn->subtxns);
448
449 /* InvalidCommandId is not zero, so set it explicitly */
452
453 return txn;
454}

References ReorderBufferTXN::changes, ReorderBufferTXN::command_id, dlist_init(), fb(), InvalidCommandId, MemoryContextAlloc(), ReorderBufferTXN::output_plugin_private, ReorderBufferTXN::subtxns, and ReorderBufferTXN::tuplecids.

Referenced by ReorderBufferTXNByXid().

◆ ReorderBufferApplyChange()

static void ReorderBufferApplyChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2073 of file reorderbuffer.c.

2076{
2077 if (streaming)
2078 rb->stream_change(rb, txn, relation, change);
2079 else
2080 rb->apply_change(rb, txn, relation, change);
2081}

References fb().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferApplyMessage()

static void ReorderBufferApplyMessage ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2101 of file reorderbuffer.c.

2103{
2104 if (streaming)
2105 rb->stream_message(rb, txn, change->lsn, true,
2106 change->data.msg.prefix,
2107 change->data.msg.message_size,
2108 change->data.msg.message);
2109 else
2110 rb->message(rb, txn, change->lsn, true,
2111 change->data.msg.prefix,
2112 change->data.msg.message_size,
2113 change->data.msg.message);
2114}

References ReorderBufferChange::data, fb(), ReorderBufferChange::lsn, ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, and ReorderBufferChange::prefix.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferApplyTruncate()

static void ReorderBufferApplyTruncate ( ReorderBuffer rb,
ReorderBufferTXN txn,
int  nrelations,
Relation relations,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2087 of file reorderbuffer.c.

2090{
2091 if (streaming)
2092 rb->stream_truncate(rb, txn, nrelations, relations, change);
2093 else
2094 rb->apply_truncate(rb, txn, nrelations, relations, change);
2095}

References fb().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferAssignChild()

void ReorderBufferAssignChild ( ReorderBuffer rb,
TransactionId  xid,
TransactionId  subxid,
XLogRecPtr  lsn 
)

Definition at line 1100 of file reorderbuffer.c.

1102{
1103 ReorderBufferTXN *txn;
1105 bool new_top;
1106 bool new_sub;
1107
1108 txn = ReorderBufferTXNByXid(rb, xid, true, &new_top, lsn, true);
1109 subtxn = ReorderBufferTXNByXid(rb, subxid, true, &new_sub, lsn, false);
1110
1111 if (!new_sub)
1112 {
1114 {
1115 /* already associated, nothing to do */
1116 return;
1117 }
1118 else
1119 {
1120 /*
1121 * We already saw this transaction, but initially added it to the
1122 * list of top-level txns. Now that we know it's not top-level,
1123 * remove it from there.
1124 */
1125 dlist_delete(&subtxn->node);
1126 }
1127 }
1128
1129 subtxn->txn_flags |= RBTXN_IS_SUBXACT;
1130 subtxn->toplevel_xid = xid;
1131 Assert(subtxn->nsubtxns == 0);
1132
1133 /* set the reference to top-level transaction */
1134 subtxn->toptxn = txn;
1135
1136 /* add to subtransaction list */
1137 dlist_push_tail(&txn->subtxns, &subtxn->node);
1138 txn->nsubtxns++;
1139
1140 /* Possibly transfer the subtxn's snapshot to its top-level txn. */
1142
1143 /* Verify LSN-ordering invariant */
1145}

References Assert, AssertTXNLsnOrder(), dlist_delete(), dlist_push_tail(), fb(), ReorderBufferTXN::nsubtxns, rbtxn_is_known_subxact, RBTXN_IS_SUBXACT, ReorderBufferTransferSnapToParent(), ReorderBufferTXNByXid(), and ReorderBufferTXN::subtxns.

Referenced by LogicalDecodingProcessRecord(), and ReorderBufferCommitChild().

◆ ReorderBufferBuildTupleCidHash()

static void ReorderBufferBuildTupleCidHash ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1837 of file reorderbuffer.c.

1838{
1839 dlist_iter iter;
1841
1843 return;
1844
1846 hash_ctl.entrysize = sizeof(ReorderBufferTupleCidEnt);
1847 hash_ctl.hcxt = rb->context;
1848
1849 /*
1850 * create the hash with the exact number of to-be-stored tuplecids from
1851 * the start
1852 */
1853 txn->tuplecid_hash =
1854 hash_create("ReorderBufferTupleCid", txn->ntuplecids, &hash_ctl,
1856
1857 dlist_foreach(iter, &txn->tuplecids)
1858 {
1861 bool found;
1862 ReorderBufferChange *change;
1863
1864 change = dlist_container(ReorderBufferChange, node, iter.cur);
1865
1867
1868 /* be careful about padding */
1869 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
1870
1871 key.rlocator = change->data.tuplecid.locator;
1872
1874 &key.tid);
1875
1877 hash_search(txn->tuplecid_hash, &key, HASH_ENTER, &found);
1878 if (!found)
1879 {
1880 ent->cmin = change->data.tuplecid.cmin;
1881 ent->cmax = change->data.tuplecid.cmax;
1882 ent->combocid = change->data.tuplecid.combocid;
1883 }
1884 else
1885 {
1886 /*
1887 * Maybe we already saw this tuple before in this transaction, but
1888 * if so it must have the same cmin.
1889 */
1890 Assert(ent->cmin == change->data.tuplecid.cmin);
1891
1892 /*
1893 * cmax may be initially invalid, but once set it can only grow,
1894 * and never become invalid again.
1895 */
1896 Assert((ent->cmax == InvalidCommandId) ||
1897 ((change->data.tuplecid.cmax != InvalidCommandId) &&
1898 (change->data.tuplecid.cmax > ent->cmax)));
1899 ent->cmax = change->data.tuplecid.cmax;
1900 }
1901 }
1902}

References ReorderBufferChange::action, Assert, ReorderBufferChange::cmax, ReorderBufferChange::cmin, ReorderBufferChange::combocid, dlist_iter::cur, ReorderBufferChange::data, dlist_container, dlist_foreach, dlist_is_empty(), fb(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_ENTER, hash_search(), InvalidCommandId, ItemPointerCopy(), HASHCTL::keysize, ReorderBufferChange::locator, ReorderBufferTXN::ntuplecids, rbtxn_has_catalog_changes, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChange::tid, ReorderBufferChange::tuplecid, ReorderBufferTXN::tuplecid_hash, and ReorderBufferTXN::tuplecids.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferCanStartStreaming()

static bool ReorderBufferCanStartStreaming ( ReorderBuffer rb)
inlinestatic

Definition at line 4314 of file reorderbuffer.c.

4315{
4316 LogicalDecodingContext *ctx = rb->private_data;
4317 SnapBuild *builder = ctx->snapshot_builder;
4318
4319 /* We can't start streaming unless a consistent state is reached. */
4321 return false;
4322
4323 /*
4324 * We can't start streaming immediately even if the streaming is enabled
4325 * because we previously decoded this transaction and now just are
4326 * restarting.
4327 */
4329 !SnapBuildXactNeedsSkip(builder, ctx->reader->ReadRecPtr))
4330 return true;
4331
4332 return false;
4333}

References fb(), LogicalDecodingContext::reader, XLogReaderState::ReadRecPtr, ReorderBufferCanStream(), SNAPBUILD_CONSISTENT, SnapBuildCurrentState(), SnapBuildXactNeedsSkip(), and LogicalDecodingContext::snapshot_builder.

Referenced by ReorderBufferCheckMemoryLimit(), and ReorderBufferProcessPartialChange().

◆ ReorderBufferCanStream()

static bool ReorderBufferCanStream ( ReorderBuffer rb)
inlinestatic

Definition at line 4305 of file reorderbuffer.c.

4306{
4307 LogicalDecodingContext *ctx = rb->private_data;
4308
4309 return ctx->streaming;
4310}

References fb(), and LogicalDecodingContext::streaming.

Referenced by ReorderBufferCanStartStreaming(), and ReorderBufferProcessPartialChange().

◆ ReorderBufferChangeMemoryUpdate()

static void ReorderBufferChangeMemoryUpdate ( ReorderBuffer rb,
ReorderBufferChange change,
ReorderBufferTXN txn,
bool  addition,
Size  sz 
)
static

Definition at line 3383 of file reorderbuffer.c.

3387{
3388 ReorderBufferTXN *toptxn;
3389
3390 Assert(txn || change);
3391
3392 /*
3393 * Ignore tuple CID changes, because those are not evicted when reaching
3394 * memory limit. So we just don't count them, because it might easily
3395 * trigger a pointless attempt to spill.
3396 */
3397 if (change && change->action == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID)
3398 return;
3399
3400 if (sz == 0)
3401 return;
3402
3403 if (txn == NULL)
3404 txn = change->txn;
3405 Assert(txn != NULL);
3406
3407 /*
3408 * Update the total size in top level as well. This is later used to
3409 * compute the decoding stats.
3410 */
3411 toptxn = rbtxn_get_toptxn(txn);
3412
3413 if (addition)
3414 {
3415 Size oldsize = txn->size;
3416
3417 txn->size += sz;
3418 rb->size += sz;
3419
3420 /* Update the total size in the top transaction. */
3421 toptxn->total_size += sz;
3422
3423 /* Update the max-heap */
3424 if (oldsize != 0)
3425 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3426 pairingheap_add(rb->txn_heap, &txn->txn_node);
3427 }
3428 else
3429 {
3430 Assert((rb->size >= sz) && (txn->size >= sz));
3431 txn->size -= sz;
3432 rb->size -= sz;
3433
3434 /* Update the total size in the top transaction. */
3435 toptxn->total_size -= sz;
3436
3437 /* Update the max-heap */
3438 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3439 if (txn->size != 0)
3440 pairingheap_add(rb->txn_heap, &txn->txn_node);
3441 }
3442
3443 Assert(txn->size <= rb->size);
3444}

References ReorderBufferChange::action, Assert, fb(), pairingheap_add(), pairingheap_remove(), rbtxn_get_toptxn, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferTXN::size, ReorderBufferTXN::total_size, ReorderBufferChange::txn, and ReorderBufferTXN::txn_node.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferQueueChange(), ReorderBufferRestoreChange(), ReorderBufferSerializeTXN(), ReorderBufferToastReplace(), and ReorderBufferTruncateTXN().

◆ ReorderBufferChangeSize()

static Size ReorderBufferChangeSize ( ReorderBufferChange change)
static

Definition at line 4457 of file reorderbuffer.c.

4458{
4459 Size sz = sizeof(ReorderBufferChange);
4460
4461 switch (change->action)
4462 {
4463 /* fall through these, they're all similar enough */
4468 {
4470 newtup;
4471 Size oldlen = 0;
4472 Size newlen = 0;
4473
4474 oldtup = change->data.tp.oldtuple;
4475 newtup = change->data.tp.newtuple;
4476
4477 if (oldtup)
4478 {
4479 sz += sizeof(HeapTupleData);
4480 oldlen = oldtup->t_len;
4481 sz += oldlen;
4482 }
4483
4484 if (newtup)
4485 {
4486 sz += sizeof(HeapTupleData);
4487 newlen = newtup->t_len;
4488 sz += newlen;
4489 }
4490
4491 break;
4492 }
4494 {
4495 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4496
4497 sz += prefix_size + change->data.msg.message_size +
4498 sizeof(Size) + sizeof(Size);
4499
4500 break;
4501 }
4503 {
4504 sz += sizeof(SharedInvalidationMessage) *
4505 change->data.inval.ninvalidations;
4506 break;
4507 }
4509 {
4510 Snapshot snap;
4511
4512 snap = change->data.snapshot;
4513
4514 sz += sizeof(SnapshotData) +
4515 sizeof(TransactionId) * snap->xcnt +
4516 sizeof(TransactionId) * snap->subxcnt;
4517
4518 break;
4519 }
4521 {
4522 sz += sizeof(Oid) * change->data.truncate.nrelids;
4523
4524 break;
4525 }
4530 /* ReorderBufferChange contains everything important */
4531 break;
4532 }
4533
4534 return sz;
4535}

References ReorderBufferChange::action, ReorderBufferChange::data, fb(), ReorderBufferChange::inval, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::prefix, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChange::snapshot, HeapTupleData::t_len, ReorderBufferChange::tp, ReorderBufferChange::truncate, and SnapshotData::xcnt.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferQueueChange(), ReorderBufferRestoreChange(), ReorderBufferToastReplace(), and ReorderBufferTruncateTXN().

◆ ReorderBufferCheckAndTruncateAbortedTXN()

static bool ReorderBufferCheckAndTruncateAbortedTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1775 of file reorderbuffer.c.

1776{
1777 /* Quick return for regression tests */
1779 return false;
1780
1781 /*
1782 * Quick return if the transaction status is already known.
1783 */
1784
1785 if (rbtxn_is_committed(txn))
1786 return false;
1787 if (rbtxn_is_aborted(txn))
1788 {
1789 /* Already-aborted transactions should not have any changes */
1790 Assert(txn->size == 0);
1791
1792 return true;
1793 }
1794
1795 /* Otherwise, check the transaction status using CLOG lookup */
1796
1798 return false;
1799
1800 if (TransactionIdDidCommit(txn->xid))
1801 {
1802 /*
1803 * Remember the transaction is committed so that we can skip CLOG
1804 * check next time, avoiding the pressure on CLOG lookup.
1805 */
1806 Assert(!rbtxn_is_aborted(txn));
1808 return false;
1809 }
1810
1811 /*
1812 * The transaction aborted. We discard both the changes collected so far
1813 * and the toast reconstruction data. The full cleanup will happen as part
1814 * of decoding ABORT record of this transaction.
1815 */
1818
1819 /* All changes should be discarded */
1820 Assert(txn->size == 0);
1821
1822 /*
1823 * Mark the transaction as aborted so we can ignore future changes of this
1824 * transaction.
1825 */
1828
1829 return true;
1830}

References Assert, DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE, debug_logical_replication_streaming, fb(), RBTXN_IS_ABORTED, rbtxn_is_aborted, RBTXN_IS_COMMITTED, rbtxn_is_committed, rbtxn_is_prepared, ReorderBufferToastReset(), ReorderBufferTruncateTXN(), ReorderBufferTXN::size, TransactionIdDidCommit(), TransactionIdIsInProgress(), ReorderBufferTXN::txn_flags, unlikely, and ReorderBufferTXN::xid.

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferCheckMemoryLimit()

static void ReorderBufferCheckMemoryLimit ( ReorderBuffer rb)
static

Definition at line 3894 of file reorderbuffer.c.

3895{
3896 ReorderBufferTXN *txn;
3897 bool update_stats = true;
3898
3899 if (rb->size >= logical_decoding_work_mem * (Size) 1024)
3900 {
3901 /*
3902 * Update the statistics as the memory usage has reached the limit. We
3903 * report the statistics update later in this function since we can
3904 * update the slot statistics altogether while streaming or
3905 * serializing transactions in most cases.
3906 */
3907 rb->memExceededCount += 1;
3908 }
3910 {
3911 /*
3912 * Bail out if debug_logical_replication_streaming is buffered and we
3913 * haven't exceeded the memory limit.
3914 */
3915 return;
3916 }
3917
3918 /*
3919 * If debug_logical_replication_streaming is immediate, loop until there's
3920 * no change. Otherwise, loop until we reach under the memory limit. One
3921 * might think that just by evicting the largest (sub)transaction we will
3922 * come under the memory limit based on assumption that the selected
3923 * transaction is at least as large as the most recent change (which
3924 * caused us to go over the memory limit). However, that is not true
3925 * because a user can reduce the logical_decoding_work_mem to a smaller
3926 * value before the most recent change.
3927 */
3928 while (rb->size >= logical_decoding_work_mem * (Size) 1024 ||
3930 rb->size > 0))
3931 {
3932 /*
3933 * Pick the largest non-aborted transaction and evict it from memory
3934 * by streaming, if possible. Otherwise, spill to disk.
3935 */
3938 {
3939 /* we know there has to be one, because the size is not zero */
3940 Assert(txn && rbtxn_is_toptxn(txn));
3941 Assert(txn->total_size > 0);
3942 Assert(rb->size >= txn->total_size);
3943
3944 /* skip the transaction if aborted */
3946 continue;
3947
3949 }
3950 else
3951 {
3952 /*
3953 * Pick the largest transaction (or subtransaction) and evict it
3954 * from memory by serializing it to disk.
3955 */
3957
3958 /* we know there has to be one, because the size is not zero */
3959 Assert(txn);
3960 Assert(txn->size > 0);
3961 Assert(rb->size >= txn->size);
3962
3963 /* skip the transaction if aborted */
3965 continue;
3966
3968 }
3969
3970 /*
3971 * After eviction, the transaction should have no entries in memory,
3972 * and should use 0 bytes for changes.
3973 */
3974 Assert(txn->size == 0);
3975 Assert(txn->nentries_mem == 0);
3976
3977 /*
3978 * We've reported the memExceededCount update while streaming or
3979 * serializing the transaction.
3980 */
3981 update_stats = false;
3982 }
3983
3984 if (update_stats)
3986
3987 /* We must be under the memory limit now. */
3988 Assert(rb->size < logical_decoding_work_mem * (Size) 1024);
3989}

References Assert, DEBUG_LOGICAL_REP_STREAMING_BUFFERED, DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE, debug_logical_replication_streaming, fb(), logical_decoding_work_mem, ReorderBufferTXN::nentries_mem, rbtxn_is_toptxn, ReorderBufferCanStartStreaming(), ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferLargestStreamableTopTXN(), ReorderBufferLargestTXN(), ReorderBufferSerializeTXN(), ReorderBufferStreamTXN(), ReorderBufferTXN::size, ReorderBufferTXN::total_size, and UpdateDecodingStats().

Referenced by ReorderBufferQueueChange().

◆ ReorderBufferCleanupSerializedTXNs()

static void ReorderBufferCleanupSerializedTXNs ( const char slotname)
static

Definition at line 4882 of file reorderbuffer.c.

4883{
4884 DIR *spill_dir;
4885 struct dirent *spill_de;
4886 struct stat statbuf;
4887 char path[MAXPGPATH * 2 + sizeof(PG_REPLSLOT_DIR)];
4888
4889 sprintf(path, "%s/%s", PG_REPLSLOT_DIR, slotname);
4890
4891 /* we're only handling directories here, skip if it's not ours */
4892 if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
4893 return;
4894
4895 spill_dir = AllocateDir(path);
4896 while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL)
4897 {
4898 /* only look at names that can be ours */
4899 if (strncmp(spill_de->d_name, "xid", 3) == 0)
4900 {
4901 snprintf(path, sizeof(path),
4902 "%s/%s/%s", PG_REPLSLOT_DIR, slotname,
4903 spill_de->d_name);
4904
4905 if (unlink(path) != 0)
4906 ereport(ERROR,
4908 errmsg("could not remove file \"%s\" during removal of %s/%s/xid*: %m",
4909 path, PG_REPLSLOT_DIR, slotname)));
4910 }
4911 }
4913}

References AllocateDir(), ereport, errcode_for_file_access(), errmsg, ERROR, fb(), FreeDir(), INFO, lstat, MAXPGPATH, PG_REPLSLOT_DIR, ReadDirExtended(), S_ISDIR, snprintf, and sprintf.

Referenced by ReorderBufferAllocate(), ReorderBufferFree(), and StartupReorderBuffer().

◆ ReorderBufferCleanupTXN()

static void ReorderBufferCleanupTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1536 of file reorderbuffer.c.

1537{
1538 bool found;
1539 dlist_mutable_iter iter;
1540 Size mem_freed = 0;
1541
1542 /* cleanup subtransactions & their changes */
1543 dlist_foreach_modify(iter, &txn->subtxns)
1544 {
1546
1548
1549 /*
1550 * Subtransactions are always associated to the toplevel TXN, even if
1551 * they originally were happening inside another subtxn, so we won't
1552 * ever recurse more than one level deep here.
1553 */
1555 Assert(subtxn->nsubtxns == 0);
1556
1558 }
1559
1560 /* cleanup changes in the txn */
1561 dlist_foreach_modify(iter, &txn->changes)
1562 {
1563 ReorderBufferChange *change;
1564
1565 change = dlist_container(ReorderBufferChange, node, iter.cur);
1566
1567 /* Check we're not mixing changes from different transactions. */
1568 Assert(change->txn == txn);
1569
1570 /*
1571 * Instead of updating the memory counter for individual changes, we
1572 * sum up the size of memory to free so we can update the memory
1573 * counter all together below. This saves costs of maintaining the
1574 * max-heap.
1575 */
1577
1578 ReorderBufferFreeChange(rb, change, false);
1579 }
1580
1581 /* Update the memory counter */
1583
1584 /*
1585 * Cleanup the tuplecids we stored for decoding catalog snapshot access.
1586 * They are always stored in the toplevel transaction.
1587 */
1588 dlist_foreach_modify(iter, &txn->tuplecids)
1589 {
1590 ReorderBufferChange *change;
1591
1592 change = dlist_container(ReorderBufferChange, node, iter.cur);
1593
1594 /* Check we're not mixing changes from different transactions. */
1595 Assert(change->txn == txn);
1597
1598 ReorderBufferFreeChange(rb, change, true);
1599 }
1600
1601 /*
1602 * Cleanup the base snapshot, if set.
1603 */
1604 if (txn->base_snapshot != NULL)
1605 {
1608 }
1609
1610 /*
1611 * Cleanup the snapshot for the last streamed run.
1612 */
1613 if (txn->snapshot_now != NULL)
1614 {
1617 }
1618
1619 /*
1620 * Remove TXN from its containing lists.
1621 *
1622 * Note: if txn is known as subxact, we are deleting the TXN from its
1623 * parent's list of known subxacts; this leaves the parent's nsubxacts
1624 * count too high, but we don't care. Otherwise, we are deleting the TXN
1625 * from the LSN-ordered list of toplevel TXNs. We remove the TXN from the
1626 * list of catalog modifying transactions as well.
1627 */
1628 dlist_delete(&txn->node);
1630 dclist_delete_from(&rb->catchange_txns, &txn->catchange_node);
1631
1632 /* now remove reference from buffer */
1633 hash_search(rb->by_txn, &txn->xid, HASH_REMOVE, &found);
1634 Assert(found);
1635
1636 /* remove entries spilled to disk */
1637 if (rbtxn_is_serialized(txn))
1639
1640 /* deallocate */
1642}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_node, ReorderBufferTXN::catchange_node, ReorderBufferTXN::changes, dlist_mutable_iter::cur, dclist_delete_from(), dlist_container, dlist_delete(), dlist_foreach_modify, fb(), HASH_REMOVE, hash_search(), ReorderBufferTXN::node, rbtxn_has_catalog_changes, rbtxn_is_known_subxact, rbtxn_is_serialized, rbtxn_is_streamed, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferFreeSnap(), ReorderBufferFreeTXN(), ReorderBufferRestoreCleanup(), SnapBuildSnapDecRefcount(), ReorderBufferTXN::snapshot_now, ReorderBufferTXN::subtxns, ReorderBufferTXN::tuplecids, ReorderBufferChange::txn, and ReorderBufferTXN::xid.

Referenced by ReorderBufferAbort(), ReorderBufferAbortOld(), ReorderBufferCleanupTXN(), ReorderBufferFinishPrepared(), ReorderBufferForget(), ReorderBufferProcessTXN(), ReorderBufferReplay(), and ReorderBufferStreamCommit().

◆ ReorderBufferCommit()

void ReorderBufferCommit ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
TimestampTz  commit_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn 
)

Definition at line 2882 of file reorderbuffer.c.

2886{
2887 ReorderBufferTXN *txn;
2888
2889 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2890 false);
2891
2892 /* unknown transaction, nothing to replay */
2893 if (txn == NULL)
2894 return;
2895
2896 ReorderBufferReplay(txn, rb, xid, commit_lsn, end_lsn, commit_time,
2897 origin_id, origin_lsn);
2898}

References fb(), InvalidXLogRecPtr, ReorderBufferReplay(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit().

◆ ReorderBufferCommitChild()

void ReorderBufferCommitChild ( ReorderBuffer rb,
TransactionId  xid,
TransactionId  subxid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn 
)

Definition at line 1220 of file reorderbuffer.c.

1223{
1225
1226 subtxn = ReorderBufferTXNByXid(rb, subxid, false, NULL,
1227 InvalidXLogRecPtr, false);
1228
1229 /*
1230 * No need to do anything if that subtxn didn't contain any changes
1231 */
1232 if (!subtxn)
1233 return;
1234
1235 subtxn->final_lsn = commit_lsn;
1236 subtxn->end_lsn = end_lsn;
1237
1238 /*
1239 * Assign this subxact as a child of the toplevel xact (no-op if already
1240 * done.)
1241 */
1243}

References fb(), InvalidXLogRecPtr, ReorderBufferAssignChild(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit(), and DecodePrepare().

◆ ReorderBufferCopySnap()

static Snapshot ReorderBufferCopySnap ( ReorderBuffer rb,
Snapshot  orig_snap,
ReorderBufferTXN txn,
CommandId  cid 
)
static

Definition at line 1910 of file reorderbuffer.c.

1912{
1913 Snapshot snap;
1914 dlist_iter iter;
1915 int i = 0;
1916 Size size;
1917
1918 size = sizeof(SnapshotData) +
1919 sizeof(TransactionId) * orig_snap->xcnt +
1920 sizeof(TransactionId) * (txn->nsubtxns + 1);
1921
1922 snap = MemoryContextAllocZero(rb->context, size);
1923 memcpy(snap, orig_snap, sizeof(SnapshotData));
1924
1925 snap->copied = true;
1926 snap->active_count = 1; /* mark as active so nobody frees it */
1927 snap->regd_count = 0;
1928 snap->xip = (TransactionId *) (snap + 1);
1929
1930 memcpy(snap->xip, orig_snap->xip, sizeof(TransactionId) * snap->xcnt);
1931
1932 /*
1933 * snap->subxip contains all txids that belong to our transaction which we
1934 * need to check via cmin/cmax. That's why we store the toplevel
1935 * transaction in there as well.
1936 */
1937 snap->subxip = snap->xip + snap->xcnt;
1938 snap->subxip[i++] = txn->xid;
1939
1940 /*
1941 * txn->nsubtxns isn't decreased when subtransactions abort, so count
1942 * manually. Since it's an upper boundary it is safe to use it for the
1943 * allocation above.
1944 */
1945 snap->subxcnt = 1;
1946
1947 dlist_foreach(iter, &txn->subtxns)
1948 {
1950
1952 snap->subxip[i++] = sub_txn->xid;
1953 snap->subxcnt++;
1954 }
1955
1956 /* sort so we can bsearch() later */
1957 qsort(snap->subxip, snap->subxcnt, sizeof(TransactionId), xidComparator);
1958
1959 /* store the specified current CommandId */
1960 snap->curcid = cid;
1961
1962 return snap;
1963}

References dlist_iter::cur, dlist_container, dlist_foreach, fb(), i, MemoryContextAllocZero(), ReorderBufferTXN::nsubtxns, qsort, ReorderBufferTXN::subtxns, ReorderBufferTXN::xid, and xidComparator().

Referenced by ReorderBufferProcessTXN(), ReorderBufferSaveTXNSnapshot(), and ReorderBufferStreamTXN().

◆ ReorderBufferExecuteInvalidations()

static void ReorderBufferExecuteInvalidations ( uint32  nmsgs,
SharedInvalidationMessage msgs 
)
static

Definition at line 3639 of file reorderbuffer.c.

3640{
3641 int i;
3642
3643 for (i = 0; i < nmsgs; i++)
3645}

References i, and LocalExecuteInvalidationMessage().

Referenced by ReorderBufferFinishPrepared(), and ReorderBufferProcessTXN().

◆ ReorderBufferFinishPrepared()

void ReorderBufferFinishPrepared ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
XLogRecPtr  two_phase_at,
TimestampTz  commit_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn,
char gid,
bool  is_commit 
)

Definition at line 2999 of file reorderbuffer.c.

3004{
3005 ReorderBufferTXN *txn;
3006 XLogRecPtr prepare_end_lsn;
3007 TimestampTz prepare_time;
3008
3009 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, commit_lsn, false);
3010
3011 /* unknown transaction, nothing to do */
3012 if (txn == NULL)
3013 return;
3014
3015 /*
3016 * By this time the txn has the prepare record information, remember it to
3017 * be later used for rollback.
3018 */
3019 prepare_end_lsn = txn->end_lsn;
3020 prepare_time = txn->prepare_time;
3021
3022 /* add the gid in the txn */
3023 txn->gid = pstrdup(gid);
3024
3025 /*
3026 * It is possible that this transaction is not decoded at prepare time
3027 * either because by that time we didn't have a consistent snapshot, or
3028 * two_phase was not enabled, or it was decoded earlier but we have
3029 * restarted. We only need to send the prepare if it was not decoded
3030 * earlier. We don't need to decode the xact for aborts if it is not done
3031 * already.
3032 */
3033 if ((txn->final_lsn < two_phase_at) && is_commit)
3034 {
3035 /*
3036 * txn must have been marked as a prepared transaction and skipped but
3037 * not sent a prepare. Also, the prepare info must have been updated
3038 * in txn even if we skip prepare.
3039 */
3043
3044 /*
3045 * By this time the txn has the prepare record information and it is
3046 * important to use that so that downstream gets the accurate
3047 * information. If instead, we have passed commit information here
3048 * then downstream can behave as it has already replayed commit
3049 * prepared after the restart.
3050 */
3051 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
3052 txn->prepare_time, txn->origin_id, txn->origin_lsn);
3053 }
3054
3055 txn->final_lsn = commit_lsn;
3056 txn->end_lsn = end_lsn;
3057 txn->commit_time = commit_time;
3058 txn->origin_id = origin_id;
3059 txn->origin_lsn = origin_lsn;
3060
3061 if (is_commit)
3062 rb->commit_prepared(rb, txn, commit_lsn);
3063 else
3064 rb->rollback_prepared(rb, txn, prepare_end_lsn, prepare_time);
3065
3066 /* cleanup: make sure there's no cache pollution */
3068 txn->invalidations);
3070}

References Assert, ReorderBufferTXN::commit_time, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::gid, ReorderBufferTXN::invalidations, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, pstrdup(), RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SKIPPED_PREPARE, ReorderBufferCleanupTXN(), ReorderBufferExecuteInvalidations(), ReorderBufferReplay(), ReorderBufferTXNByXid(), ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodeAbort(), and DecodeCommit().

◆ ReorderBufferForget()

void ReorderBufferForget ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3178 of file reorderbuffer.c.

3179{
3180 ReorderBufferTXN *txn;
3181
3182 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3183 false);
3184
3185 /* unknown, nothing to forget */
3186 if (txn == NULL)
3187 return;
3188
3189 /* this transaction mustn't be streamed */
3191
3192 /* cosmetic... */
3193 txn->final_lsn = lsn;
3194
3195 /*
3196 * Process only cache invalidation messages in this transaction if there
3197 * are any. Even if we're not interested in the transaction's contents, it
3198 * could have manipulated the catalog and we need to update the caches
3199 * according to that.
3200 */
3201 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3203 txn->invalidations);
3204 else
3205 Assert(txn->ninvalidations == 0);
3206
3207 /* remove potential on-disk data, and deallocate */
3209}

References Assert, ReorderBufferTXN::base_snapshot, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit().

◆ ReorderBufferFree()

void ReorderBufferFree ( ReorderBuffer rb)

Definition at line 418 of file reorderbuffer.c.

419{
420 MemoryContext context = rb->context;
421
422 /*
423 * We free separately allocated data by entirely scrapping reorderbuffer's
424 * memory context.
425 */
426 MemoryContextDelete(context);
427
428 /* Free disk space used by unconsumed reorder buffers */
430}

References ReplicationSlot::data, fb(), MemoryContextDelete(), MyReplicationSlot, ReplicationSlotPersistentData::name, NameStr, and ReorderBufferCleanupSerializedTXNs().

Referenced by FreeDecodingContext().

◆ ReorderBufferFreeChange()

void ReorderBufferFreeChange ( ReorderBuffer rb,
ReorderBufferChange change,
bool  upd_mem 
)

Definition at line 523 of file reorderbuffer.c.

525{
526 /* update memory accounting info */
527 if (upd_mem)
530
531 /* free contained data */
532 switch (change->action)
533 {
538 if (change->data.tp.newtuple)
539 {
541 change->data.tp.newtuple = NULL;
542 }
543
544 if (change->data.tp.oldtuple)
545 {
547 change->data.tp.oldtuple = NULL;
548 }
549 break;
551 if (change->data.msg.prefix != NULL)
552 pfree(change->data.msg.prefix);
553 change->data.msg.prefix = NULL;
554 if (change->data.msg.message != NULL)
555 pfree(change->data.msg.message);
556 change->data.msg.message = NULL;
557 break;
559 if (change->data.inval.invalidations)
560 pfree(change->data.inval.invalidations);
561 change->data.inval.invalidations = NULL;
562 break;
564 if (change->data.snapshot)
565 {
567 change->data.snapshot = NULL;
568 }
569 break;
570 /* no data in addition to the struct itself */
572 if (change->data.truncate.relids != NULL)
573 {
575 change->data.truncate.relids = NULL;
576 }
577 break;
582 break;
583 }
584
585 pfree(change);
586}

References ReorderBufferChange::action, ReorderBufferChange::data, fb(), ReorderBufferChange::inval, ReorderBufferChange::invalidations, ReorderBufferChange::message, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::oldtuple, pfree(), ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferFreeRelids(), ReorderBufferFreeSnap(), ReorderBufferFreeTupleBuf(), ReorderBufferChange::snapshot, ReorderBufferChange::tp, and ReorderBufferChange::truncate.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferIterTXNFinish(), ReorderBufferIterTXNNext(), ReorderBufferProcessTXN(), ReorderBufferQueueChange(), ReorderBufferResetTXN(), ReorderBufferRestoreChanges(), ReorderBufferSerializeTXN(), ReorderBufferToastReset(), and ReorderBufferTruncateTXN().

◆ ReorderBufferFreeRelids()

void ReorderBufferFreeRelids ( ReorderBuffer rb,
Oid relids 
)

Definition at line 642 of file reorderbuffer.c.

643{
644 pfree(relids);
645}

References pfree().

Referenced by ReorderBufferFreeChange().

◆ ReorderBufferFreeSnap()

static void ReorderBufferFreeSnap ( ReorderBuffer rb,
Snapshot  snap 
)
static

Definition at line 1969 of file reorderbuffer.c.

1970{
1971 if (snap->copied)
1972 pfree(snap);
1973 else
1975}

References fb(), pfree(), and SnapBuildSnapDecRefcount().

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferProcessTXN(), and ReorderBufferStreamTXN().

◆ ReorderBufferFreeTupleBuf()

void ReorderBufferFreeTupleBuf ( HeapTuple  tuple)

Definition at line 611 of file reorderbuffer.c.

612{
613 pfree(tuple);
614}

References pfree().

Referenced by ReorderBufferFreeChange().

◆ ReorderBufferFreeTXN()

static void ReorderBufferFreeTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 460 of file reorderbuffer.c.

461{
462 /* clean the lookup cache if we were cached (quite likely) */
463 if (rb->by_txn_last_xid == txn->xid)
464 {
465 rb->by_txn_last_xid = InvalidTransactionId;
466 rb->by_txn_last_txn = NULL;
467 }
468
469 /* free data that's contained */
470
471 if (txn->gid != NULL)
472 {
473 pfree(txn->gid);
474 txn->gid = NULL;
475 }
476
477 if (txn->tuplecid_hash != NULL)
478 {
480 txn->tuplecid_hash = NULL;
481 }
482
483 if (txn->invalidations)
484 {
485 pfree(txn->invalidations);
486 txn->invalidations = NULL;
487 }
488
490 {
493 }
494
495 /* Reset the toast hash */
497
498 /* All changes must be deallocated */
499 Assert(txn->size == 0);
500
501 pfree(txn);
502}

References Assert, fb(), ReorderBufferTXN::gid, hash_destroy(), ReorderBufferTXN::invalidations, ReorderBufferTXN::invalidations_distributed, InvalidTransactionId, pfree(), ReorderBufferToastReset(), ReorderBufferTXN::size, ReorderBufferTXN::tuplecid_hash, and ReorderBufferTXN::xid.

Referenced by ReorderBufferCleanupTXN().

◆ ReorderBufferGetCatalogChangesXacts()

TransactionId * ReorderBufferGetCatalogChangesXacts ( ReorderBuffer rb)

Definition at line 3689 of file reorderbuffer.c.

3690{
3691 dlist_iter iter;
3692 TransactionId *xids = NULL;
3693 size_t xcnt = 0;
3694
3695 /* Quick return if the list is empty */
3696 if (dclist_count(&rb->catchange_txns) == 0)
3697 return NULL;
3698
3699 /* Initialize XID array */
3700 xids = palloc_array(TransactionId, dclist_count(&rb->catchange_txns));
3701 dclist_foreach(iter, &rb->catchange_txns)
3702 {
3704 catchange_node,
3705 iter.cur);
3706
3708
3709 xids[xcnt++] = txn->xid;
3710 }
3711
3712 qsort(xids, xcnt, sizeof(TransactionId), xidComparator);
3713
3714 Assert(xcnt == dclist_count(&rb->catchange_txns));
3715 return xids;
3716}

References Assert, dlist_iter::cur, dclist_container, dclist_count(), dclist_foreach, fb(), palloc_array, qsort, rbtxn_has_catalog_changes, ReorderBufferTXN::xid, and xidComparator().

Referenced by SnapBuildSerialize().

◆ ReorderBufferGetInvalidations()

uint32 ReorderBufferGetInvalidations ( ReorderBuffer rb,
TransactionId  xid,
SharedInvalidationMessage **  msgs 
)

Definition at line 5629 of file reorderbuffer.c.

5631{
5632 ReorderBufferTXN *txn;
5633
5634 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
5635 false);
5636
5637 if (txn == NULL)
5638 return 0;
5639
5640 *msgs = txn->invalidations;
5641
5642 return txn->ninvalidations;
5643}

References fb(), ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, and ReorderBufferTXNByXid().

Referenced by SnapBuildDistributeSnapshotAndInval().

◆ ReorderBufferGetOldestTXN()

ReorderBufferTXN * ReorderBufferGetOldestTXN ( ReorderBuffer rb)

Definition at line 1045 of file reorderbuffer.c.

1046{
1047 ReorderBufferTXN *txn;
1048
1050
1051 if (dlist_is_empty(&rb->toplevel_by_lsn))
1052 return NULL;
1053
1054 txn = dlist_head_element(ReorderBufferTXN, node, &rb->toplevel_by_lsn);
1055
1058 return txn;
1059}

References Assert, AssertTXNLsnOrder(), dlist_head_element, dlist_is_empty(), fb(), ReorderBufferTXN::first_lsn, rbtxn_is_known_subxact, and XLogRecPtrIsValid.

Referenced by SnapBuildProcessRunningXacts().

◆ ReorderBufferGetOldestXmin()

TransactionId ReorderBufferGetOldestXmin ( ReorderBuffer rb)

Definition at line 1073 of file reorderbuffer.c.

1074{
1075 ReorderBufferTXN *txn;
1076
1078
1079 if (dlist_is_empty(&rb->txns_by_base_snapshot_lsn))
1080 return InvalidTransactionId;
1081
1082 txn = dlist_head_element(ReorderBufferTXN, base_snapshot_node,
1083 &rb->txns_by_base_snapshot_lsn);
1084 return txn->base_snapshot->xmin;
1085}

References AssertTXNLsnOrder(), ReorderBufferTXN::base_snapshot, dlist_head_element, dlist_is_empty(), fb(), InvalidTransactionId, and SnapshotData::xmin.

Referenced by SnapBuildProcessRunningXacts().

◆ ReorderBufferImmediateInvalidation()

void ReorderBufferImmediateInvalidation ( ReorderBuffer rb,
uint32  ninvalidations,
SharedInvalidationMessage invalidations 
)

Definition at line 3251 of file reorderbuffer.c.

3253{
3257 int i;
3258
3259 if (use_subtxn)
3261
3262 /*
3263 * Force invalidations to happen outside of a valid transaction - that way
3264 * entries will just be marked as invalid without accessing the catalog.
3265 * That's advantageous because we don't need to setup the full state
3266 * necessary for catalog access.
3267 */
3268 if (use_subtxn)
3270
3271 for (i = 0; i < ninvalidations; i++)
3272 LocalExecuteInvalidationMessage(&invalidations[i]);
3273
3274 if (use_subtxn)
3275 {
3278 CurrentResourceOwner = cowner;
3279 }
3280}

References AbortCurrentTransaction(), BeginInternalSubTransaction(), CurrentMemoryContext, CurrentResourceOwner, fb(), i, IsTransactionOrTransactionBlock(), LocalExecuteInvalidationMessage(), MemoryContextSwitchTo(), and RollbackAndReleaseCurrentSubTransaction().

Referenced by ReorderBufferAbort(), ReorderBufferForget(), ReorderBufferInvalidate(), and xact_decode().

◆ ReorderBufferInvalidate()

void ReorderBufferInvalidate ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3220 of file reorderbuffer.c.

3221{
3222 ReorderBufferTXN *txn;
3223
3224 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3225 false);
3226
3227 /* unknown, nothing to do */
3228 if (txn == NULL)
3229 return;
3230
3231 /*
3232 * Process cache invalidation messages if there are any. Even if we're not
3233 * interested in the transaction's contents, it could have manipulated the
3234 * catalog and we need to update the caches according to that.
3235 */
3236 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3238 txn->invalidations);
3239 else
3240 Assert(txn->ninvalidations == 0);
3241}

References Assert, ReorderBufferTXN::base_snapshot, fb(), ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodePrepare().

◆ ReorderBufferIterCompare()

static int ReorderBufferIterCompare ( Datum  a,
Datum  b,
void arg 
)
static

Definition at line 1262 of file reorderbuffer.c.

1263{
1265 XLogRecPtr pos_a = state->entries[DatumGetInt32(a)].lsn;
1266 XLogRecPtr pos_b = state->entries[DatumGetInt32(b)].lsn;
1267
1268 if (pos_a < pos_b)
1269 return 1;
1270 else if (pos_a == pos_b)
1271 return 0;
1272 return -1;
1273}

References a, arg, b, DatumGetInt32(), and fb().

Referenced by ReorderBufferIterTXNInit().

◆ ReorderBufferIterTXNFinish()

static void ReorderBufferIterTXNFinish ( ReorderBuffer rb,
ReorderBufferIterTXNState state 
)
static

Definition at line 1505 of file reorderbuffer.c.

1507{
1508 int32 off;
1509
1510 for (off = 0; off < state->nr_txns; off++)
1511 {
1512 if (state->entries[off].file.vfd != -1)
1513 FileClose(state->entries[off].file.vfd);
1514 }
1515
1516 /* free memory we might have "leaked" in the last *Next call */
1517 if (!dlist_is_empty(&state->old_change))
1518 {
1519 ReorderBufferChange *change;
1520
1521 change = dlist_container(ReorderBufferChange, node,
1522 dlist_pop_head_node(&state->old_change));
1523 ReorderBufferFreeChange(rb, change, true);
1524 Assert(dlist_is_empty(&state->old_change));
1525 }
1526
1527 binaryheap_free(state->heap);
1528 pfree(state);
1529}

References Assert, binaryheap_free(), dlist_container, dlist_is_empty(), dlist_pop_head_node(), fb(), FileClose(), pfree(), and ReorderBufferFreeChange().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferIterTXNInit()

static void ReorderBufferIterTXNInit ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferIterTXNState *volatile iter_state 
)
static

Definition at line 1285 of file reorderbuffer.c.

1287{
1288 Size nr_txns = 0;
1291 int32 off;
1292
1293 *iter_state = NULL;
1294
1295 /* Check ordering of changes in the toplevel transaction. */
1297
1298 /*
1299 * Calculate the size of our heap: one element for every transaction that
1300 * contains changes. (Besides the transactions already in the reorder
1301 * buffer, we count the one we were directly passed.)
1302 */
1303 if (txn->nentries > 0)
1304 nr_txns++;
1305
1307 {
1309
1311
1312 /* Check ordering of changes in this subtransaction. */
1314
1315 if (cur_txn->nentries > 0)
1316 nr_txns++;
1317 }
1318
1319 /* allocate iteration state */
1321 MemoryContextAllocZero(rb->context,
1323 sizeof(ReorderBufferIterTXNEntry) * nr_txns);
1324
1325 state->nr_txns = nr_txns;
1326 dlist_init(&state->old_change);
1327
1328 for (off = 0; off < state->nr_txns; off++)
1329 {
1330 state->entries[off].file.vfd = -1;
1331 state->entries[off].segno = 0;
1332 }
1333
1334 /* allocate heap */
1335 state->heap = binaryheap_allocate(state->nr_txns,
1337 state);
1338
1339 /* Now that the state fields are initialized, it is safe to return it. */
1340 *iter_state = state;
1341
1342 /*
1343 * Now insert items into the binary heap, in an unordered fashion. (We
1344 * will run a heap assembly step at the end; this is more efficient.)
1345 */
1346
1347 off = 0;
1348
1349 /* add toplevel transaction if it contains changes */
1350 if (txn->nentries > 0)
1351 {
1353
1354 if (rbtxn_is_serialized(txn))
1355 {
1356 /* serialize remaining changes */
1358 ReorderBufferRestoreChanges(rb, txn, &state->entries[off].file,
1359 &state->entries[off].segno);
1360 }
1361
1363 &txn->changes);
1364
1365 state->entries[off].lsn = cur_change->lsn;
1366 state->entries[off].change = cur_change;
1367 state->entries[off].txn = txn;
1368
1370 }
1371
1372 /* add subtransactions if they contain changes */
1374 {
1376
1378
1379 if (cur_txn->nentries > 0)
1380 {
1382
1384 {
1385 /* serialize remaining changes */
1388 &state->entries[off].file,
1389 &state->entries[off].segno);
1390 }
1392 &cur_txn->changes);
1393
1394 state->entries[off].lsn = cur_change->lsn;
1395 state->entries[off].change = cur_change;
1396 state->entries[off].txn = cur_txn;
1397
1399 }
1400 }
1401
1402 /* assemble a valid binary heap */
1403 binaryheap_build(state->heap);
1404}

References AssertChangeLsnOrder(), binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), ReorderBufferTXN::changes, dlist_container, dlist_foreach, dlist_head_element, dlist_init(), fb(), Int32GetDatum(), MemoryContextAllocZero(), ReorderBufferTXN::nentries, rbtxn_is_serialized, ReorderBufferIterCompare(), ReorderBufferRestoreChanges(), ReorderBufferSerializeTXN(), and ReorderBufferTXN::subtxns.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferIterTXNNext()

static ReorderBufferChange * ReorderBufferIterTXNNext ( ReorderBuffer rb,
ReorderBufferIterTXNState state 
)
static

Definition at line 1413 of file reorderbuffer.c.

1414{
1415 ReorderBufferChange *change;
1417 int32 off;
1418
1419 /* nothing there anymore */
1420 if (binaryheap_empty(state->heap))
1421 return NULL;
1422
1423 off = DatumGetInt32(binaryheap_first(state->heap));
1424 entry = &state->entries[off];
1425
1426 /* free memory we might have "leaked" in the previous *Next call */
1427 if (!dlist_is_empty(&state->old_change))
1428 {
1429 change = dlist_container(ReorderBufferChange, node,
1430 dlist_pop_head_node(&state->old_change));
1431 ReorderBufferFreeChange(rb, change, true);
1432 Assert(dlist_is_empty(&state->old_change));
1433 }
1434
1435 change = entry->change;
1436
1437 /*
1438 * update heap with information about which transaction has the next
1439 * relevant change in LSN order
1440 */
1441
1442 /* there are in-memory changes */
1443 if (dlist_has_next(&entry->txn->changes, &entry->change->node))
1444 {
1445 dlist_node *next = dlist_next_node(&entry->txn->changes, &change->node);
1448
1449 /* txn stays the same */
1450 state->entries[off].lsn = next_change->lsn;
1451 state->entries[off].change = next_change;
1452
1454 return change;
1455 }
1456
1457 /* try to load changes from disk */
1458 if (entry->txn->nentries != entry->txn->nentries_mem)
1459 {
1460 /*
1461 * Ugly: restoring changes will reuse *Change records, thus delete the
1462 * current one from the per-tx list and only free in the next call.
1463 */
1464 dlist_delete(&change->node);
1465 dlist_push_tail(&state->old_change, &change->node);
1466
1467 /*
1468 * Update the total bytes processed by the txn for which we are
1469 * releasing the current set of changes and restoring the new set of
1470 * changes.
1471 */
1472 rb->totalBytes += entry->txn->size;
1473 if (ReorderBufferRestoreChanges(rb, entry->txn, &entry->file,
1474 &state->entries[off].segno))
1475 {
1476 /* successfully restored changes from disk */
1479 &entry->txn->changes);
1480
1481 elog(DEBUG2, "restored %u/%u changes from disk",
1482 (uint32) entry->txn->nentries_mem,
1483 (uint32) entry->txn->nentries);
1484
1485 Assert(entry->txn->nentries_mem);
1486 /* txn stays the same */
1487 state->entries[off].lsn = next_change->lsn;
1488 state->entries[off].change = next_change;
1490
1491 return change;
1492 }
1493 }
1494
1495 /* ok, no changes there anymore, remove */
1497
1498 return change;
1499}

References Assert, binaryheap_empty, binaryheap_first(), binaryheap_remove_first(), binaryheap_replace_first(), ReorderBufferIterTXNEntry::change, ReorderBufferTXN::changes, DatumGetInt32(), DEBUG2, dlist_container, dlist_delete(), dlist_has_next(), dlist_head_element, dlist_is_empty(), dlist_next_node(), dlist_pop_head_node(), dlist_push_tail(), elog, fb(), ReorderBufferIterTXNEntry::file, Int32GetDatum(), ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, next, ReorderBufferChange::node, ReorderBufferFreeChange(), ReorderBufferRestoreChanges(), ReorderBufferTXN::size, and ReorderBufferIterTXNEntry::txn.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferLargestStreamableTopTXN()

static ReorderBufferTXN * ReorderBufferLargestStreamableTopTXN ( ReorderBuffer rb)
static

Definition at line 3844 of file reorderbuffer.c.

3845{
3846 dlist_iter iter;
3847 Size largest_size = 0;
3849
3850 /* Find the largest top-level transaction having a base snapshot. */
3851 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
3852 {
3853 ReorderBufferTXN *txn;
3854
3855 txn = dlist_container(ReorderBufferTXN, base_snapshot_node, iter.cur);
3856
3857 /* must not be a subtxn */
3859 /* base_snapshot must be set */
3860 Assert(txn->base_snapshot != NULL);
3861
3862 /* Don't consider these kinds of transactions for eviction. */
3863 if (rbtxn_has_partial_change(txn) ||
3865 rbtxn_is_aborted(txn))
3866 continue;
3867
3868 /* Find the largest of the eviction candidates. */
3869 if ((largest == NULL || txn->total_size > largest_size) &&
3870 (txn->total_size > 0))
3871 {
3872 largest = txn;
3873 largest_size = txn->total_size;
3874 }
3875 }
3876
3877 return largest;
3878}

References Assert, ReorderBufferTXN::base_snapshot, dlist_iter::cur, dlist_container, dlist_foreach, fb(), rbtxn_has_partial_change, rbtxn_has_streamable_change, rbtxn_is_aborted, rbtxn_is_known_subxact, and ReorderBufferTXN::total_size.

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferLargestTXN()

static ReorderBufferTXN * ReorderBufferLargestTXN ( ReorderBuffer rb)
static

Definition at line 3803 of file reorderbuffer.c.

3804{
3806
3807 /* Get the largest transaction from the max-heap */
3809 pairingheap_first(rb->txn_heap));
3810
3811 Assert(largest);
3812 Assert(largest->size > 0);
3813 Assert(largest->size <= rb->size);
3814
3815 return largest;
3816}

References Assert, fb(), pairingheap_container, and pairingheap_first().

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferMaybeMarkTXNStreamed()

static void ReorderBufferMaybeMarkTXNStreamed ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 2139 of file reorderbuffer.c.

2140{
2141 /*
2142 * The top-level transaction, is marked as streamed always, even if it
2143 * does not contain any changes (that is, when all the changes are in
2144 * subtransactions).
2145 *
2146 * For subtransactions, we only mark them as streamed when there are
2147 * changes in them.
2148 *
2149 * We do it this way because of aborts - we don't want to send aborts for
2150 * XIDs the downstream is not aware of. And of course, it always knows
2151 * about the top-level xact (we send the XID in all messages), but we
2152 * never stream XIDs of empty subxacts.
2153 */
2154 if (rbtxn_is_toptxn(txn) || (txn->nentries_mem != 0))
2156}

References ReorderBufferTXN::nentries_mem, RBTXN_IS_STREAMED, rbtxn_is_toptxn, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferProcessTXN(), and ReorderBufferTruncateTXN().

◆ ReorderBufferPrepare()

void ReorderBufferPrepare ( ReorderBuffer rb,
TransactionId  xid,
char gid 
)

Definition at line 2958 of file reorderbuffer.c.

2960{
2961 ReorderBufferTXN *txn;
2962
2963 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2964 false);
2965
2966 /* unknown transaction, nothing to replay */
2967 if (txn == NULL)
2968 return;
2969
2970 /*
2971 * txn must have been marked as a prepared transaction and must have
2972 * neither been skipped nor sent a prepare. Also, the prepare info must
2973 * have been updated in it by now.
2974 */
2977
2978 txn->gid = pstrdup(gid);
2979
2980 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
2981 txn->prepare_time, txn->origin_id, txn->origin_lsn);
2982
2983 /*
2984 * Send a prepare if not already done so. This might occur if we have
2985 * detected a concurrent abort while replaying the non-streaming
2986 * transaction.
2987 */
2988 if (!rbtxn_sent_prepare(txn))
2989 {
2990 rb->prepare(rb, txn, txn->final_lsn);
2992 }
2993}

References Assert, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::gid, InvalidXLogRecPtr, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, pstrdup(), RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, ReorderBufferReplay(), ReorderBufferTXNByXid(), ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodePrepare().

◆ ReorderBufferProcessPartialChange()

static void ReorderBufferProcessPartialChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferChange change,
bool  toast_insert 
)
static

Definition at line 742 of file reorderbuffer.c.

745{
746 ReorderBufferTXN *toptxn;
747
748 /*
749 * The partial changes need to be processed only while streaming
750 * in-progress transactions.
751 */
753 return;
754
755 /* Get the top transaction. */
756 toptxn = rbtxn_get_toptxn(txn);
757
758 /*
759 * Indicate a partial change for toast inserts. The change will be
760 * considered as complete once we get the insert or update on the main
761 * table and we are sure that the pending toast chunks are not required
762 * anymore.
763 *
764 * If we allow streaming when there are pending toast chunks then such
765 * chunks won't be released till the insert (multi_insert) is complete and
766 * we expect the txn to have streamed all changes after streaming. This
767 * restriction is mainly to ensure the correctness of streamed
768 * transactions and it doesn't seem worth uplifting such a restriction
769 * just to allow this case because anyway we will stream the transaction
770 * once such an insert is complete.
771 */
772 if (toast_insert)
774 else if (rbtxn_has_partial_change(toptxn) &&
775 IsInsertOrUpdate(change->action) &&
778
779 /*
780 * Indicate a partial change for speculative inserts. The change will be
781 * considered as complete once we get the speculative confirm or abort
782 * token.
783 */
784 if (IsSpecInsert(change->action))
786 else if (rbtxn_has_partial_change(toptxn) &&
789
790 /*
791 * Stream the transaction if it is serialized before and the changes are
792 * now complete in the top-level transaction.
793 *
794 * The reason for doing the streaming of such a transaction as soon as we
795 * get the complete change for it is that previously it would have reached
796 * the memory threshold and wouldn't get streamed because of incomplete
797 * changes. Delaying such transactions would increase apply lag for them.
798 */
800 !(rbtxn_has_partial_change(toptxn)) &&
801 rbtxn_is_serialized(txn) &&
803 ReorderBufferStreamTXN(rb, toptxn);
804}

References ReorderBufferChange::action, ReorderBufferChange::clear_toast_afterwards, ReorderBufferChange::data, fb(), IsInsertOrUpdate, IsSpecConfirmOrAbort, IsSpecInsert, rbtxn_get_toptxn, RBTXN_HAS_PARTIAL_CHANGE, rbtxn_has_partial_change, rbtxn_has_streamable_change, rbtxn_is_serialized, ReorderBufferCanStartStreaming(), ReorderBufferCanStream(), ReorderBufferStreamTXN(), ReorderBufferChange::tp, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferQueueChange().

◆ ReorderBufferProcessTXN()

static void ReorderBufferProcessTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
XLogRecPtr  commit_lsn,
volatile Snapshot  snapshot_now,
volatile CommandId  command_id,
bool  streaming 
)
static

Definition at line 2212 of file reorderbuffer.c.

2217{
2218 bool using_subtxn;
2224 volatile bool stream_started = false;
2225 ReorderBufferTXN *volatile curtxn = NULL;
2226
2227 /* build data to be able to lookup the CommandIds of catalog tuples */
2229
2230 /* setup the initial snapshot */
2231 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2232
2233 /*
2234 * Decoding needs access to syscaches et al., which in turn use
2235 * heavyweight locks and such. Thus we need to have enough state around to
2236 * keep track of those. The easiest way is to simply use a transaction
2237 * internally. That also allows us to easily enforce that nothing writes
2238 * to the database by checking for xid assignments.
2239 *
2240 * When we're called via the SQL SRF there's already a transaction
2241 * started, so start an explicit subtransaction there.
2242 */
2244
2245 PG_TRY();
2246 {
2247 ReorderBufferChange *change;
2248 int changes_count = 0; /* used to accumulate the number of
2249 * changes */
2250
2251 if (using_subtxn)
2252 BeginInternalSubTransaction(streaming ? "stream" : "replay");
2253 else
2255
2256 /*
2257 * We only need to send begin/begin-prepare for non-streamed
2258 * transactions.
2259 */
2260 if (!streaming)
2261 {
2262 if (rbtxn_is_prepared(txn))
2263 rb->begin_prepare(rb, txn);
2264 else
2265 rb->begin(rb, txn);
2266 }
2267
2269 while ((change = ReorderBufferIterTXNNext(rb, iterstate)) != NULL)
2270 {
2271 Relation relation = NULL;
2272 Oid reloid;
2273
2275
2276 /*
2277 * We can't call start stream callback before processing first
2278 * change.
2279 */
2281 {
2282 if (streaming)
2283 {
2284 txn->origin_id = change->origin_id;
2285 rb->stream_start(rb, txn, change->lsn);
2286 stream_started = true;
2287 }
2288 }
2289
2290 /*
2291 * Enforce correct ordering of changes, merged from multiple
2292 * subtransactions. The changes may have the same LSN due to
2293 * MULTI_INSERT xlog records.
2294 */
2296
2297 prev_lsn = change->lsn;
2298
2299 /*
2300 * Set the current xid to detect concurrent aborts. This is
2301 * required for the cases when we decode the changes before the
2302 * COMMIT record is processed.
2303 */
2304 if (streaming || rbtxn_is_prepared(change->txn))
2305 {
2306 curtxn = change->txn;
2308 }
2309
2310 switch (change->action)
2311 {
2313
2314 /*
2315 * Confirmation for speculative insertion arrived. Simply
2316 * use as a normal record. It'll be cleaned up at the end
2317 * of INSERT processing.
2318 */
2319 if (specinsert == NULL)
2320 elog(ERROR, "invalid ordering of speculative insertion changes");
2321 Assert(specinsert->data.tp.oldtuple == NULL);
2322 change = specinsert;
2324
2325 /* intentionally fall through */
2330 Assert(snapshot_now);
2331
2332 reloid = RelidByRelfilenumber(change->data.tp.rlocator.spcOid,
2333 change->data.tp.rlocator.relNumber);
2334
2335 /*
2336 * Mapped catalog tuple without data, emitted while
2337 * catalog table was in the process of being rewritten. We
2338 * can fail to look up the relfilenumber, because the
2339 * relmapper has no "historic" view, in contrast to the
2340 * normal catalog during decoding. Thus repeated rewrites
2341 * can cause a lookup failure. That's OK because we do not
2342 * decode catalog changes anyway. Normally such tuples
2343 * would be skipped over below, but we can't identify
2344 * whether the table should be logically logged without
2345 * mapping the relfilenumber to the oid.
2346 */
2347 if (reloid == InvalidOid &&
2348 change->data.tp.newtuple == NULL &&
2349 change->data.tp.oldtuple == NULL)
2350 goto change_done;
2351 else if (reloid == InvalidOid)
2352 elog(ERROR, "could not map filenumber \"%s\" to relation OID",
2353 relpathperm(change->data.tp.rlocator,
2354 MAIN_FORKNUM).str);
2355
2356 relation = RelationIdGetRelation(reloid);
2357
2358 if (!RelationIsValid(relation))
2359 elog(ERROR, "could not open relation with OID %u (for filenumber \"%s\")",
2360 reloid,
2361 relpathperm(change->data.tp.rlocator,
2362 MAIN_FORKNUM).str);
2363
2364 if (!RelationIsLogicallyLogged(relation))
2365 goto change_done;
2366
2367 /*
2368 * Ignore temporary heaps created during DDL unless the
2369 * plugin has asked for them.
2370 */
2371 if (relation->rd_rel->relrewrite && !rb->output_rewrites)
2372 goto change_done;
2373
2374 /*
2375 * For now ignore sequence changes entirely. Most of the
2376 * time they don't log changes using records we
2377 * understand, so it doesn't make sense to handle the few
2378 * cases we do.
2379 */
2380 if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
2381 goto change_done;
2382
2383 /* user-triggered change */
2384 if (!IsToastRelation(relation))
2385 {
2386 ReorderBufferToastReplace(rb, txn, relation, change);
2387 ReorderBufferApplyChange(rb, txn, relation, change,
2388 streaming);
2389
2390 /*
2391 * Only clear reassembled toast chunks if we're sure
2392 * they're not required anymore. The creator of the
2393 * tuple tells us.
2394 */
2395 if (change->data.tp.clear_toast_afterwards)
2397 }
2398 /* we're not interested in toast deletions */
2399 else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
2400 {
2401 /*
2402 * Need to reassemble the full toasted Datum in
2403 * memory, to ensure the chunks don't get reused till
2404 * we're done remove it from the list of this
2405 * transaction's changes. Otherwise it will get
2406 * freed/reused while restoring spooled data from
2407 * disk.
2408 */
2409 Assert(change->data.tp.newtuple != NULL);
2410
2411 dlist_delete(&change->node);
2412 ReorderBufferToastAppendChunk(rb, txn, relation,
2413 change);
2414 }
2415
2417
2418 /*
2419 * If speculative insertion was confirmed, the record
2420 * isn't needed anymore.
2421 */
2422 if (specinsert != NULL)
2423 {
2425 specinsert = NULL;
2426 }
2427
2428 if (RelationIsValid(relation))
2429 {
2430 RelationClose(relation);
2431 relation = NULL;
2432 }
2433 break;
2434
2436
2437 /*
2438 * Speculative insertions are dealt with by delaying the
2439 * processing of the insert until the confirmation record
2440 * arrives. For that we simply unlink the record from the
2441 * chain, so it does not get freed/reused while restoring
2442 * spooled data from disk.
2443 *
2444 * This is safe in the face of concurrent catalog changes
2445 * because the relevant relation can't be changed between
2446 * speculative insertion and confirmation due to
2447 * CheckTableNotInUse() and locking.
2448 */
2449
2450 /* Previous speculative insertion must be aborted */
2452
2453 /* and memorize the pending insertion */
2454 dlist_delete(&change->node);
2455 specinsert = change;
2456 break;
2457
2459
2460 /*
2461 * Abort for speculative insertion arrived. So cleanup the
2462 * specinsert tuple and toast hash.
2463 *
2464 * Note that we get the spec abort change for each toast
2465 * entry but we need to perform the cleanup only the first
2466 * time we get it for the main table.
2467 */
2468 if (specinsert != NULL)
2469 {
2470 /*
2471 * We must clean the toast hash before processing a
2472 * completely new tuple to avoid confusion about the
2473 * previous tuple's toast chunks.
2474 */
2477
2478 /* We don't need this record anymore. */
2480 specinsert = NULL;
2481 }
2482 break;
2483
2485 {
2486 int i;
2487 int nrelids = change->data.truncate.nrelids;
2488 int nrelations = 0;
2489 Relation *relations;
2490
2491 relations = palloc0_array(Relation, nrelids);
2492 for (i = 0; i < nrelids; i++)
2493 {
2494 Oid relid = change->data.truncate.relids[i];
2495 Relation rel;
2496
2497 rel = RelationIdGetRelation(relid);
2498
2499 if (!RelationIsValid(rel))
2500 elog(ERROR, "could not open relation with OID %u", relid);
2501
2502 if (!RelationIsLogicallyLogged(rel))
2503 continue;
2504
2505 relations[nrelations++] = rel;
2506 }
2507
2508 /* Apply the truncate. */
2510 relations, change,
2511 streaming);
2512
2513 for (i = 0; i < nrelations; i++)
2514 RelationClose(relations[i]);
2515
2516 break;
2517 }
2518
2520 ReorderBufferApplyMessage(rb, txn, change, streaming);
2521 break;
2522
2524 /* Execute the invalidation messages locally */
2526 change->data.inval.invalidations);
2527 break;
2528
2530 /* get rid of the old */
2532
2533 if (snapshot_now->copied)
2534 {
2535 ReorderBufferFreeSnap(rb, snapshot_now);
2536 snapshot_now =
2538 txn, command_id);
2539 }
2540
2541 /*
2542 * Restored from disk, need to be careful not to double
2543 * free. We could introduce refcounting for that, but for
2544 * now this seems infrequent enough not to care.
2545 */
2546 else if (change->data.snapshot->copied)
2547 {
2548 snapshot_now =
2550 txn, command_id);
2551 }
2552 else
2553 {
2554 snapshot_now = change->data.snapshot;
2555 }
2556
2557 /* and continue with the new one */
2558 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2559 break;
2560
2563
2564 if (command_id < change->data.command_id)
2565 {
2566 command_id = change->data.command_id;
2567
2568 if (!snapshot_now->copied)
2569 {
2570 /* we don't use the global one anymore */
2571 snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2572 txn, command_id);
2573 }
2574
2575 snapshot_now->curcid = command_id;
2576
2578 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2579 }
2580
2581 break;
2582
2584 elog(ERROR, "tuplecid value in changequeue");
2585 break;
2586 }
2587
2588 /*
2589 * It is possible that the data is not sent to downstream for a
2590 * long time either because the output plugin filtered it or there
2591 * is a DDL that generates a lot of data that is not processed by
2592 * the plugin. So, in such cases, the downstream can timeout. To
2593 * avoid that we try to send a keepalive message if required.
2594 * Trying to send a keepalive message after every change has some
2595 * overhead, but testing showed there is no noticeable overhead if
2596 * we do it after every ~100 changes.
2597 */
2598#define CHANGES_THRESHOLD 100
2599
2601 {
2602 rb->update_progress_txn(rb, txn, prev_lsn);
2603 changes_count = 0;
2604 }
2605 }
2606
2607 /* speculative insertion record must be freed by now */
2609
2610 /* clean up the iterator */
2612 iterstate = NULL;
2613
2614 /*
2615 * Update total transaction count and total bytes processed by the
2616 * transaction and its subtransactions. Ensure to not count the
2617 * streamed transaction multiple times.
2618 *
2619 * Note that the statistics computation has to be done after
2620 * ReorderBufferIterTXNFinish as it releases the serialized change
2621 * which we have already accounted in ReorderBufferIterTXNNext.
2622 */
2623 if (!rbtxn_is_streamed(txn))
2624 rb->totalTxns++;
2625
2626 rb->totalBytes += txn->total_size;
2627
2628 /*
2629 * Done with current changes, send the last message for this set of
2630 * changes depending upon streaming mode.
2631 */
2632 if (streaming)
2633 {
2634 if (stream_started)
2635 {
2636 rb->stream_stop(rb, txn, prev_lsn);
2637 stream_started = false;
2638 }
2639 }
2640 else
2641 {
2642 /*
2643 * Call either PREPARE (for two-phase transactions) or COMMIT (for
2644 * regular ones).
2645 */
2646 if (rbtxn_is_prepared(txn))
2647 {
2649 rb->prepare(rb, txn, commit_lsn);
2651 }
2652 else
2653 rb->commit(rb, txn, commit_lsn);
2654 }
2655
2656 /* this is just a sanity check against bad output plugin behaviour */
2658 elog(ERROR, "output plugin used XID %u",
2660
2661 /*
2662 * Remember the command ID and snapshot for the next set of changes in
2663 * streaming mode.
2664 */
2665 if (streaming)
2666 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2667 else if (snapshot_now->copied)
2668 ReorderBufferFreeSnap(rb, snapshot_now);
2669
2670 /* cleanup */
2672
2673 /*
2674 * Aborting the current (sub-)transaction as a whole has the right
2675 * semantics. We want all locks acquired in here to be released, not
2676 * reassigned to the parent and we do not want any database access
2677 * have persistent effects.
2678 */
2680
2681 /* make sure there's no cache pollution */
2683 {
2686 }
2687 else
2688 {
2692 }
2693
2694 if (using_subtxn)
2695 {
2698 CurrentResourceOwner = cowner;
2699 }
2700
2701 /*
2702 * We are here due to one of the four reasons: 1. Decoding an
2703 * in-progress txn. 2. Decoding a prepared txn. 3. Decoding of a
2704 * prepared txn that was (partially) streamed. 4. Decoding a committed
2705 * txn.
2706 *
2707 * For 1, we allow truncation of txn data by removing the changes
2708 * already streamed but still keeping other things like invalidations,
2709 * snapshot, and tuplecids. For 2 and 3, we indicate
2710 * ReorderBufferTruncateTXN to do more elaborate truncation of txn
2711 * data as the entire transaction has been decoded except for commit.
2712 * For 4, as the entire txn has been decoded, we can fully clean up
2713 * the TXN reorder buffer.
2714 */
2715 if (streaming || rbtxn_is_prepared(txn))
2716 {
2717 if (streaming)
2719
2721 /* Reset the CheckXidAlive */
2723 }
2724 else
2726 }
2727 PG_CATCH();
2728 {
2731
2732 /* TODO: Encapsulate cleanup from the PG_TRY and PG_CATCH blocks */
2733 if (iterstate)
2735
2737
2738 /*
2739 * Force cache invalidation to happen outside of a valid transaction
2740 * to prevent catalog access as we just caught an error.
2741 */
2743
2744 /* make sure there's no cache pollution */
2746 {
2749 }
2750 else
2751 {
2755 }
2756
2757 if (using_subtxn)
2758 {
2761 CurrentResourceOwner = cowner;
2762 }
2763
2764 /*
2765 * The error code ERRCODE_TRANSACTION_ROLLBACK indicates a concurrent
2766 * abort of the (sub)transaction we are streaming or preparing. We
2767 * need to do the cleanup and return gracefully on this error, see
2768 * SetupCheckXidLive.
2769 *
2770 * This error code can be thrown by one of the callbacks we call
2771 * during decoding so we need to ensure that we return gracefully only
2772 * when we are sending the data in streaming mode and the streaming is
2773 * not finished yet or when we are sending the data out on a PREPARE
2774 * during a two-phase commit.
2775 */
2776 if (errdata->sqlerrcode == ERRCODE_TRANSACTION_ROLLBACK &&
2778 {
2779 /* curtxn must be set for streaming or prepared transactions */
2780 Assert(curtxn);
2781
2782 /* Cleanup the temporary error state. */
2785 errdata = NULL;
2786
2787 /* Remember the transaction is aborted. */
2789 curtxn->txn_flags |= RBTXN_IS_ABORTED;
2790
2791 /* Mark the transaction is streamed if appropriate */
2792 if (stream_started)
2794
2795 /* Reset the TXN so that it is allowed to stream remaining data. */
2796 ReorderBufferResetTXN(rb, txn, snapshot_now,
2797 command_id, prev_lsn,
2798 specinsert);
2799 }
2800 else
2801 {
2804 PG_RE_THROW();
2805 }
2806 }
2807 PG_END_TRY();
2808}

References AbortCurrentTransaction(), ReorderBufferChange::action, Assert, BeginInternalSubTransaction(), CHANGES_THRESHOLD, CHECK_FOR_INTERRUPTS, CheckXidAlive, ReorderBufferChange::clear_toast_afterwards, ReorderBufferChange::command_id, SnapshotData::copied, CopyErrorData(), SnapshotData::curcid, CurrentMemoryContext, CurrentResourceOwner, ReorderBufferChange::data, data, dlist_delete(), elog, ERROR, fb(), FlushErrorState(), FreeErrorData(), GetCurrentTransactionId(), GetCurrentTransactionIdIfAny(), i, ReorderBufferChange::inval, InvalidateSystemCaches(), ReorderBufferChange::invalidations, ReorderBufferTXN::invalidations, ReorderBufferTXN::invalidations_distributed, InvalidCommandId, InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsToastRelation(), IsTransactionOrTransactionBlock(), ReorderBufferChange::lsn, MAIN_FORKNUM, MemoryContextSwitchTo(), ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::ninvalidations_distributed, ReorderBufferChange::node, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::origin_id, ReorderBufferTXN::origin_id, palloc0_array, PG_CATCH, PG_END_TRY, pg_fallthrough, PG_RE_THROW, PG_TRY, rbtxn_distr_inval_overflowed, RBTXN_IS_ABORTED, rbtxn_is_committed, rbtxn_is_prepared, rbtxn_is_streamed, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, RelationData::rd_rel, RelationClose(), RelationIdGetRelation(), RelationIsLogicallyLogged, RelationIsValid, RelidByRelfilenumber(), ReorderBufferChange::relids, RelFileLocator::relNumber, relpathperm, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferApplyChange(), ReorderBufferApplyMessage(), ReorderBufferApplyTruncate(), ReorderBufferBuildTupleCidHash(), ReorderBufferCleanupTXN(), ReorderBufferCopySnap(), ReorderBufferExecuteInvalidations(), ReorderBufferFreeChange(), ReorderBufferFreeSnap(), ReorderBufferIterTXNFinish(), ReorderBufferIterTXNInit(), ReorderBufferIterTXNNext(), ReorderBufferMaybeMarkTXNStreamed(), ReorderBufferResetTXN(), ReorderBufferSaveTXNSnapshot(), ReorderBufferToastAppendChunk(), ReorderBufferToastReplace(), ReorderBufferToastReset(), ReorderBufferTruncateTXN(), ReorderBufferChange::rlocator, RollbackAndReleaseCurrentSubTransaction(), SetupCheckXidLive(), SetupHistoricSnapshot(), ReorderBufferChange::snapshot, RelFileLocator::spcOid, StartTransactionCommand(), TeardownHistoricSnapshot(), ReorderBufferTXN::total_size, ReorderBufferChange::tp, ReorderBufferChange::truncate, ReorderBufferTXN::tuplecid_hash, ReorderBufferChange::txn, ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by ReorderBufferReplay(), and ReorderBufferStreamTXN().

◆ ReorderBufferProcessXid()

void ReorderBufferProcessXid ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3293 of file reorderbuffer.c.

3294{
3295 /* many records won't have an xid assigned, centralize check here */
3296 if (xid != InvalidTransactionId)
3297 ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3298}

References fb(), InvalidTransactionId, and ReorderBufferTXNByXid().

Referenced by heap2_decode(), heap_decode(), LogicalDecodingProcessRecord(), logicalmsg_decode(), standby_decode(), xact_decode(), and xlog_decode().

◆ ReorderBufferQueueChange()

void ReorderBufferQueueChange ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
ReorderBufferChange change,
bool  toast_insert 
)

Definition at line 811 of file reorderbuffer.c.

813{
814 ReorderBufferTXN *txn;
815
816 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
817
818 /*
819 * If we have detected that the transaction is aborted while streaming the
820 * previous changes or by checking its CLOG, there is no point in
821 * collecting further changes for it.
822 */
823 if (rbtxn_is_aborted(txn))
824 {
825 /*
826 * We don't need to update memory accounting for this change as we
827 * have not added it to the queue yet.
828 */
829 ReorderBufferFreeChange(rb, change, false);
830 return;
831 }
832
833 /*
834 * The changes that are sent downstream are considered streamable. We
835 * remember such transactions so that only those will later be considered
836 * for streaming.
837 */
838 if (change->action == REORDER_BUFFER_CHANGE_INSERT ||
844 {
845 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
846
848 }
849
850 change->lsn = lsn;
851 change->txn = txn;
852
854 dlist_push_tail(&txn->changes, &change->node);
855 txn->nentries++;
856 txn->nentries_mem++;
857
858 /* update memory accounting information */
861
862 /* process partial change */
864
865 /* check the memory limits and evict something if needed */
867}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::changes, dlist_push_tail(), fb(), ReorderBufferChange::lsn, ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, rbtxn_get_toptxn, RBTXN_HAS_STREAMABLE_CHANGE, rbtxn_is_aborted, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferCheckMemoryLimit(), ReorderBufferFreeChange(), ReorderBufferProcessPartialChange(), ReorderBufferTXNByXid(), ReorderBufferChange::txn, ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodeDelete(), DecodeInsert(), DecodeMultiInsert(), DecodeSpecConfirm(), DecodeTruncate(), DecodeUpdate(), ReorderBufferAddNewCommandId(), ReorderBufferAddSnapshot(), ReorderBufferQueueInvalidations(), and ReorderBufferQueueMessage().

◆ ReorderBufferQueueInvalidations()

◆ ReorderBufferQueueMessage()

void ReorderBufferQueueMessage ( ReorderBuffer rb,
TransactionId  xid,
Snapshot  snap,
XLogRecPtr  lsn,
bool  transactional,
const char prefix,
Size  message_size,
const char message 
)

Definition at line 874 of file reorderbuffer.c.

878{
879 if (transactional)
880 {
881 MemoryContext oldcontext;
882 ReorderBufferChange *change;
883
885
886 /*
887 * We don't expect snapshots for transactional changes - we'll use the
888 * snapshot derived later during apply (unless the change gets
889 * skipped).
890 */
891 Assert(!snap);
892
893 oldcontext = MemoryContextSwitchTo(rb->context);
894
897 change->data.msg.prefix = pstrdup(prefix);
898 change->data.msg.message_size = message_size;
899 change->data.msg.message = palloc(message_size);
900 memcpy(change->data.msg.message, message, message_size);
901
902 ReorderBufferQueueChange(rb, xid, lsn, change, false);
903
904 MemoryContextSwitchTo(oldcontext);
905 }
906 else
907 {
908 ReorderBufferTXN *txn = NULL;
909 volatile Snapshot snapshot_now = snap;
910
911 /* Non-transactional changes require a valid snapshot. */
912 Assert(snapshot_now);
913
914 if (xid != InvalidTransactionId)
915 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
916
917 /* setup snapshot to allow catalog access */
918 SetupHistoricSnapshot(snapshot_now, NULL);
919 PG_TRY();
920 {
921 rb->message(rb, txn, lsn, false, prefix, message_size, message);
922
924 }
925 PG_CATCH();
926 {
928 PG_RE_THROW();
929 }
930 PG_END_TRY();
931 }
932}

References ReorderBufferChange::action, Assert, ReorderBufferChange::data, fb(), InvalidTransactionId, MemoryContextSwitchTo(), ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, palloc(), PG_CATCH, PG_END_TRY, PG_RE_THROW, PG_TRY, ReorderBufferChange::prefix, pstrdup(), REORDER_BUFFER_CHANGE_MESSAGE, ReorderBufferAllocChange(), ReorderBufferQueueChange(), ReorderBufferTXNByXid(), SetupHistoricSnapshot(), and TeardownHistoricSnapshot().

Referenced by logicalmsg_decode().

◆ ReorderBufferRememberPrepareInfo()

bool ReorderBufferRememberPrepareInfo ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  prepare_lsn,
XLogRecPtr  end_lsn,
TimestampTz  prepare_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn 
)

Definition at line 2905 of file reorderbuffer.c.

2909{
2910 ReorderBufferTXN *txn;
2911
2912 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2913
2914 /* unknown transaction, nothing to do */
2915 if (txn == NULL)
2916 return false;
2917
2918 /*
2919 * Remember the prepare information to be later used by commit prepared in
2920 * case we skip doing prepare.
2921 */
2922 txn->final_lsn = prepare_lsn;
2923 txn->end_lsn = end_lsn;
2924 txn->prepare_time = prepare_time;
2925 txn->origin_id = origin_id;
2926 txn->origin_lsn = origin_lsn;
2927
2928 /* Mark this transaction as a prepared transaction */
2931
2932 return true;
2933}

References Assert, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, InvalidXLogRecPtr, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by DecodePrepare().

◆ ReorderBufferReplay()

static void ReorderBufferReplay ( ReorderBufferTXN txn,
ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
TimestampTz  commit_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn 
)
static

Definition at line 2821 of file reorderbuffer.c.

2826{
2827 Snapshot snapshot_now;
2828 CommandId command_id = FirstCommandId;
2829
2830 txn->final_lsn = commit_lsn;
2831 txn->end_lsn = end_lsn;
2832 txn->commit_time = commit_time;
2833 txn->origin_id = origin_id;
2834 txn->origin_lsn = origin_lsn;
2835
2836 /*
2837 * If the transaction was (partially) streamed, we need to commit it in a
2838 * 'streamed' way. That is, we first stream the remaining part of the
2839 * transaction, and then invoke stream_commit message.
2840 *
2841 * Called after everything (origin ID, LSN, ...) is stored in the
2842 * transaction to avoid passing that information directly.
2843 */
2844 if (rbtxn_is_streamed(txn))
2845 {
2847 return;
2848 }
2849
2850 /*
2851 * If this transaction has no snapshot, it didn't make any changes to the
2852 * database, so there's nothing to decode. Note that
2853 * ReorderBufferCommitChild will have transferred any snapshots from
2854 * subtransactions if there were any.
2855 */
2856 if (txn->base_snapshot == NULL)
2857 {
2858 Assert(txn->ninvalidations == 0);
2859
2860 /*
2861 * Removing this txn before a commit might result in the computation
2862 * of an incorrect restart_lsn. See SnapBuildProcessRunningXacts.
2863 */
2864 if (!rbtxn_is_prepared(txn))
2866 return;
2867 }
2868
2869 snapshot_now = txn->base_snapshot;
2870
2871 /* Process and send the changes to output plugin. */
2872 ReorderBufferProcessTXN(rb, txn, commit_lsn, snapshot_now,
2873 command_id, false);
2874}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::commit_time, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, FirstCommandId, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, rbtxn_is_prepared, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferProcessTXN(), and ReorderBufferStreamCommit().

Referenced by ReorderBufferCommit(), ReorderBufferFinishPrepared(), and ReorderBufferPrepare().

◆ ReorderBufferResetTXN()

static void ReorderBufferResetTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
Snapshot  snapshot_now,
CommandId  command_id,
XLogRecPtr  last_lsn,
ReorderBufferChange specinsert 
)
static

Definition at line 2166 of file reorderbuffer.c.

2171{
2172 /* Discard the changes that we just streamed */
2174
2175 /* Free all resources allocated for toast reconstruction */
2177
2178 /* Return the spec insert change if it is not NULL */
2179 if (specinsert != NULL)
2180 {
2182 specinsert = NULL;
2183 }
2184
2185 /*
2186 * For the streaming case, stop the stream and remember the command ID and
2187 * snapshot for the streaming run.
2188 */
2189 if (rbtxn_is_streamed(txn))
2190 {
2191 rb->stream_stop(rb, txn, last_lsn);
2192 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2193 }
2194
2195 /* All changes must be deallocated */
2196 Assert(txn->size == 0);
2197}

References Assert, fb(), rbtxn_is_prepared, rbtxn_is_streamed, ReorderBufferFreeChange(), ReorderBufferSaveTXNSnapshot(), ReorderBufferToastReset(), ReorderBufferTruncateTXN(), and ReorderBufferTXN::size.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferRestoreChange()

static void ReorderBufferRestoreChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
char data 
)
static

Definition at line 4685 of file reorderbuffer.c.

4687{
4689 ReorderBufferChange *change;
4690
4691 ondisk = (ReorderBufferDiskChange *) data;
4692
4693 change = ReorderBufferAllocChange(rb);
4694
4695 /* copy static part */
4696 memcpy(change, &ondisk->change, sizeof(ReorderBufferChange));
4697
4698 data += sizeof(ReorderBufferDiskChange);
4699
4700 /* restore individual stuff */
4701 switch (change->action)
4702 {
4703 /* fall through these, they're all similar enough */
4708 if (change->data.tp.oldtuple)
4709 {
4710 uint32 tuplelen = ((HeapTuple) data)->t_len;
4711
4712 change->data.tp.oldtuple =
4714
4715 /* restore ->tuple */
4716 memcpy(change->data.tp.oldtuple, data,
4717 sizeof(HeapTupleData));
4718 data += sizeof(HeapTupleData);
4719
4720 /* reset t_data pointer into the new tuplebuf */
4721 change->data.tp.oldtuple->t_data =
4722 (HeapTupleHeader) ((char *) change->data.tp.oldtuple + HEAPTUPLESIZE);
4723
4724 /* restore tuple data itself */
4726 data += tuplelen;
4727 }
4728
4729 if (change->data.tp.newtuple)
4730 {
4731 /* here, data might not be suitably aligned! */
4733
4735 sizeof(uint32));
4736
4737 change->data.tp.newtuple =
4739
4740 /* restore ->tuple */
4741 memcpy(change->data.tp.newtuple, data,
4742 sizeof(HeapTupleData));
4743 data += sizeof(HeapTupleData);
4744
4745 /* reset t_data pointer into the new tuplebuf */
4746 change->data.tp.newtuple->t_data =
4747 (HeapTupleHeader) ((char *) change->data.tp.newtuple + HEAPTUPLESIZE);
4748
4749 /* restore tuple data itself */
4751 data += tuplelen;
4752 }
4753
4754 break;
4756 {
4757 Size prefix_size;
4758
4759 /* read prefix */
4760 memcpy(&prefix_size, data, sizeof(Size));
4761 data += sizeof(Size);
4762 change->data.msg.prefix = MemoryContextAlloc(rb->context,
4763 prefix_size);
4764 memcpy(change->data.msg.prefix, data, prefix_size);
4765 Assert(change->data.msg.prefix[prefix_size - 1] == '\0');
4766 data += prefix_size;
4767
4768 /* read the message */
4769 memcpy(&change->data.msg.message_size, data, sizeof(Size));
4770 data += sizeof(Size);
4771 change->data.msg.message = MemoryContextAlloc(rb->context,
4772 change->data.msg.message_size);
4773 memcpy(change->data.msg.message, data,
4774 change->data.msg.message_size);
4775 data += change->data.msg.message_size;
4776
4777 break;
4778 }
4780 {
4782 change->data.inval.ninvalidations;
4783
4784 change->data.inval.invalidations =
4785 MemoryContextAlloc(rb->context, inval_size);
4786
4787 /* read the message */
4789
4790 break;
4791 }
4793 {
4796 Size size;
4797
4798 oldsnap = (Snapshot) data;
4799
4800 size = sizeof(SnapshotData) +
4801 sizeof(TransactionId) * oldsnap->xcnt +
4802 sizeof(TransactionId) * (oldsnap->subxcnt + 0);
4803
4804 change->data.snapshot = MemoryContextAllocZero(rb->context, size);
4805
4806 newsnap = change->data.snapshot;
4807
4808 memcpy(newsnap, data, size);
4809 newsnap->xip = (TransactionId *)
4810 (((char *) newsnap) + sizeof(SnapshotData));
4811 newsnap->subxip = newsnap->xip + newsnap->xcnt;
4812 newsnap->copied = true;
4813 break;
4814 }
4815 /* the base struct contains all the data, easy peasy */
4817 {
4818 Oid *relids;
4819
4820 relids = ReorderBufferAllocRelids(rb, change->data.truncate.nrelids);
4821 memcpy(relids, data, change->data.truncate.nrelids * sizeof(Oid));
4822 change->data.truncate.relids = relids;
4823
4824 break;
4825 }
4830 break;
4831 }
4832
4833 dlist_push_tail(&txn->changes, &change->node);
4834 txn->nentries_mem++;
4835
4836 /*
4837 * Update memory accounting for the restored change. We need to do this
4838 * although we don't check the memory limit when restoring the changes in
4839 * this branch (we only do that when initially queueing the changes after
4840 * decoding), because we will release the changes later, and that will
4841 * update the accounting too (subtracting the size from the counters). And
4842 * we don't want to underflow there.
4843 */
4845 ReorderBufferChangeSize(change));
4846}

References ReorderBufferChange::action, Assert, ReorderBufferDiskChange::change, ReorderBufferTXN::changes, ReorderBufferChange::data, data, dlist_push_tail(), fb(), HEAPTUPLESIZE, ReorderBufferChange::inval, ReorderBufferChange::invalidations, MemoryContextAlloc(), MemoryContextAllocZero(), ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferTXN::nentries_mem, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::node, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferAllocChange(), ReorderBufferAllocRelids(), ReorderBufferAllocTupleBuf(), ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), SizeofHeapTupleHeader, ReorderBufferChange::snapshot, HeapTupleData::t_data, ReorderBufferChange::tp, ReorderBufferChange::truncate, and SnapshotData::xcnt.

Referenced by ReorderBufferRestoreChanges().

◆ ReorderBufferRestoreChanges()

static Size ReorderBufferRestoreChanges ( ReorderBuffer rb,
ReorderBufferTXN txn,
TXNEntryFile file,
XLogSegNo segno 
)
static

Definition at line 4542 of file reorderbuffer.c.

4544{
4545 Size restored = 0;
4548 File *fd = &file->vfd;
4549
4552
4553 /* free current entries, so we have memory for more */
4555 {
4558
4559 dlist_delete(&cleanup->node);
4561 }
4562 txn->nentries_mem = 0;
4564
4566
4567 while (restored < max_changes_in_memory && *segno <= last_segno)
4568 {
4569 int readBytes;
4571
4573
4574 if (*fd == -1)
4575 {
4576 char path[MAXPGPATH];
4577
4578 /* first time in */
4579 if (*segno == 0)
4580 XLByteToSeg(txn->first_lsn, *segno, wal_segment_size);
4581
4582 Assert(*segno != 0 || dlist_is_empty(&txn->changes));
4583
4584 /*
4585 * No need to care about TLIs here, only used during a single run,
4586 * so each LSN only maps to a specific WAL record.
4587 */
4589 *segno);
4590
4592
4593 /* No harm in resetting the offset even in case of failure */
4594 file->curOffset = 0;
4595
4596 if (*fd < 0 && errno == ENOENT)
4597 {
4598 *fd = -1;
4599 (*segno)++;
4600 continue;
4601 }
4602 else if (*fd < 0)
4603 ereport(ERROR,
4605 errmsg("could not open file \"%s\": %m",
4606 path)));
4607 }
4608
4609 /*
4610 * Read the statically sized part of a change which has information
4611 * about the total size. If we couldn't read a record, we're at the
4612 * end of this file.
4613 */
4615 readBytes = FileRead(file->vfd, rb->outbuf,
4618
4619 /* eof */
4620 if (readBytes == 0)
4621 {
4622 FileClose(*fd);
4623 *fd = -1;
4624 (*segno)++;
4625 continue;
4626 }
4627 else if (readBytes < 0)
4628 ereport(ERROR,
4630 errmsg("could not read from reorderbuffer spill file: %m")));
4631 else if (readBytes != sizeof(ReorderBufferDiskChange))
4632 ereport(ERROR,
4634 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4635 readBytes,
4636 (uint32) sizeof(ReorderBufferDiskChange))));
4637
4638 file->curOffset += readBytes;
4639
4640 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4641
4643 sizeof(ReorderBufferDiskChange) + ondisk->size);
4644 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4645
4646 readBytes = FileRead(file->vfd,
4647 rb->outbuf + sizeof(ReorderBufferDiskChange),
4648 ondisk->size - sizeof(ReorderBufferDiskChange),
4649 file->curOffset,
4651
4652 if (readBytes < 0)
4653 ereport(ERROR,
4655 errmsg("could not read from reorderbuffer spill file: %m")));
4656 else if (readBytes != ondisk->size - sizeof(ReorderBufferDiskChange))
4657 ereport(ERROR,
4659 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4660 readBytes,
4661 (uint32) (ondisk->size - sizeof(ReorderBufferDiskChange)))));
4662
4663 file->curOffset += readBytes;
4664
4665 /*
4666 * ok, read a full change from disk, now restore it into proper
4667 * in-memory format
4668 */
4669 ReorderBufferRestoreChange(rb, txn, rb->outbuf);
4670 restored++;
4671 }
4672
4673 return restored;
4674}

References Assert, ReorderBufferTXN::changes, CHECK_FOR_INTERRUPTS, cleanup(), TXNEntryFile::curOffset, dlist_container, dlist_delete(), dlist_foreach_modify, dlist_is_empty(), ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), FileClose(), FileRead(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::first_lsn, max_changes_in_memory, MAXPGPATH, MyReplicationSlot, ReorderBufferTXN::nentries_mem, PathNameOpenFile(), PG_BINARY, ReorderBufferFreeChange(), ReorderBufferRestoreChange(), ReorderBufferSerializedPath(), ReorderBufferSerializeReserve(), ReorderBufferDiskChange::size, TXNEntryFile::vfd, wal_segment_size, ReorderBufferTXN::xid, XLByteToSeg, and XLogRecPtrIsValid.

Referenced by ReorderBufferIterTXNInit(), and ReorderBufferIterTXNNext().

◆ ReorderBufferRestoreCleanup()

static void ReorderBufferRestoreCleanup ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4852 of file reorderbuffer.c.

4853{
4854 XLogSegNo first;
4855 XLogSegNo cur;
4856 XLogSegNo last;
4857
4860
4863
4864 /* iterate over all possible filenames, and delete them */
4865 for (cur = first; cur <= last; cur++)
4866 {
4867 char path[MAXPGPATH];
4868
4870 if (unlink(path) != 0 && errno != ENOENT)
4871 ereport(ERROR,
4873 errmsg("could not remove file \"%s\": %m", path)));
4874 }
4875}

References Assert, cur, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::first_lsn, MAXPGPATH, MyReplicationSlot, ReorderBufferSerializedPath(), wal_segment_size, ReorderBufferTXN::xid, XLByteToSeg, and XLogRecPtrIsValid.

Referenced by ReorderBufferCleanupTXN(), and ReorderBufferTruncateTXN().

◆ ReorderBufferSaveTXNSnapshot()

static void ReorderBufferSaveTXNSnapshot ( ReorderBuffer rb,
ReorderBufferTXN txn,
Snapshot  snapshot_now,
CommandId  command_id 
)
inlinestatic

Definition at line 2121 of file reorderbuffer.c.

2123{
2124 txn->command_id = command_id;
2125
2126 /* Avoid copying if it's already copied. */
2127 if (snapshot_now->copied)
2128 txn->snapshot_now = snapshot_now;
2129 else
2130 txn->snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2131 txn, command_id);
2132}

References ReorderBufferTXN::command_id, SnapshotData::copied, fb(), ReorderBufferCopySnap(), and ReorderBufferTXN::snapshot_now.

Referenced by ReorderBufferProcessTXN(), and ReorderBufferResetTXN().

◆ ReorderBufferSerializeChange()

static void ReorderBufferSerializeChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
int  fd,
ReorderBufferChange change 
)
static

Definition at line 4090 of file reorderbuffer.c.

4092{
4095
4097
4098 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4099 memcpy(&ondisk->change, change, sizeof(ReorderBufferChange));
4100
4101 switch (change->action)
4102 {
4103 /* fall through these, they're all similar enough */
4108 {
4109 char *data;
4111 newtup;
4112 Size oldlen = 0;
4113 Size newlen = 0;
4114
4115 oldtup = change->data.tp.oldtuple;
4116 newtup = change->data.tp.newtuple;
4117
4118 if (oldtup)
4119 {
4120 sz += sizeof(HeapTupleData);
4121 oldlen = oldtup->t_len;
4122 sz += oldlen;
4123 }
4124
4125 if (newtup)
4126 {
4127 sz += sizeof(HeapTupleData);
4128 newlen = newtup->t_len;
4129 sz += newlen;
4130 }
4131
4132 /* make sure we have enough space */
4134
4135 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4136 /* might have been reallocated above */
4137 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4138
4139 if (oldlen)
4140 {
4141 memcpy(data, oldtup, sizeof(HeapTupleData));
4142 data += sizeof(HeapTupleData);
4143
4144 memcpy(data, oldtup->t_data, oldlen);
4145 data += oldlen;
4146 }
4147
4148 if (newlen)
4149 {
4150 memcpy(data, newtup, sizeof(HeapTupleData));
4151 data += sizeof(HeapTupleData);
4152
4153 memcpy(data, newtup->t_data, newlen);
4154 data += newlen;
4155 }
4156 break;
4157 }
4159 {
4160 char *data;
4161 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4162
4163 sz += prefix_size + change->data.msg.message_size +
4164 sizeof(Size) + sizeof(Size);
4166
4167 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4168
4169 /* might have been reallocated above */
4170 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4171
4172 /* write the prefix including the size */
4173 memcpy(data, &prefix_size, sizeof(Size));
4174 data += sizeof(Size);
4175 memcpy(data, change->data.msg.prefix,
4176 prefix_size);
4177 data += prefix_size;
4178
4179 /* write the message including the size */
4180 memcpy(data, &change->data.msg.message_size, sizeof(Size));
4181 data += sizeof(Size);
4182 memcpy(data, change->data.msg.message,
4183 change->data.msg.message_size);
4184 data += change->data.msg.message_size;
4185
4186 break;
4187 }
4189 {
4190 char *data;
4192 change->data.inval.ninvalidations;
4193
4194 sz += inval_size;
4195
4197 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4198
4199 /* might have been reallocated above */
4200 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4202 data += inval_size;
4203
4204 break;
4205 }
4207 {
4208 Snapshot snap;
4209 char *data;
4210
4211 snap = change->data.snapshot;
4212
4213 sz += sizeof(SnapshotData) +
4214 sizeof(TransactionId) * snap->xcnt +
4215 sizeof(TransactionId) * snap->subxcnt;
4216
4217 /* make sure we have enough space */
4219 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4220 /* might have been reallocated above */
4221 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4222
4223 memcpy(data, snap, sizeof(SnapshotData));
4224 data += sizeof(SnapshotData);
4225
4226 if (snap->xcnt)
4227 {
4228 memcpy(data, snap->xip,
4229 sizeof(TransactionId) * snap->xcnt);
4230 data += sizeof(TransactionId) * snap->xcnt;
4231 }
4232
4233 if (snap->subxcnt)
4234 {
4235 memcpy(data, snap->subxip,
4236 sizeof(TransactionId) * snap->subxcnt);
4237 data += sizeof(TransactionId) * snap->subxcnt;
4238 }
4239 break;
4240 }
4242 {
4243 Size size;
4244 char *data;
4245
4246 /* account for the OIDs of truncated relations */
4247 size = sizeof(Oid) * change->data.truncate.nrelids;
4248 sz += size;
4249
4250 /* make sure we have enough space */
4252
4253 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4254 /* might have been reallocated above */
4255 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4256
4257 memcpy(data, change->data.truncate.relids, size);
4258 data += size;
4259
4260 break;
4261 }
4266 /* ReorderBufferChange contains everything important */
4267 break;
4268 }
4269
4270 ondisk->size = sz;
4271
4272 errno = 0;
4274 if (write(fd, rb->outbuf, ondisk->size) != ondisk->size)
4275 {
4276 int save_errno = errno;
4277
4279
4280 /* if write didn't set errno, assume problem is no disk space */
4282 ereport(ERROR,
4284 errmsg("could not write to data file for XID %u: %m",
4285 txn->xid)));
4286 }
4288
4289 /*
4290 * Keep the transaction's final_lsn up to date with each change we send to
4291 * disk, so that ReorderBufferRestoreCleanup works correctly. (We used to
4292 * only do this on commit and abort records, but that doesn't work if a
4293 * system crash leaves a transaction without its abort record).
4294 *
4295 * Make sure not to move it backwards.
4296 */
4297 if (txn->final_lsn < change->lsn)
4298 txn->final_lsn = change->lsn;
4299
4300 Assert(ondisk->change.action == change->action);
4301}

References ReorderBufferChange::action, Assert, ReorderBufferDiskChange::change, CloseTransientFile(), ReorderBufferChange::data, data, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), ReorderBufferTXN::final_lsn, ReorderBufferChange::inval, ReorderBufferChange::invalidations, ReorderBufferChange::lsn, ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, pgstat_report_wait_end(), pgstat_report_wait_start(), ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferSerializeReserve(), ReorderBufferDiskChange::size, ReorderBufferChange::snapshot, HeapTupleData::t_len, ReorderBufferChange::tp, ReorderBufferChange::truncate, write, SnapshotData::xcnt, and ReorderBufferTXN::xid.

Referenced by ReorderBufferSerializeTXN().

◆ ReorderBufferSerializedPath()

◆ ReorderBufferSerializeReserve()

static void ReorderBufferSerializeReserve ( ReorderBuffer rb,
Size  sz 
)
static

Definition at line 3770 of file reorderbuffer.c.

3771{
3772 if (!rb->outbufsize)
3773 {
3774 rb->outbuf = MemoryContextAlloc(rb->context, sz);
3775 rb->outbufsize = sz;
3776 }
3777 else if (rb->outbufsize < sz)
3778 {
3779 rb->outbuf = repalloc(rb->outbuf, sz);
3780 rb->outbufsize = sz;
3781 }
3782}

References fb(), MemoryContextAlloc(), and repalloc().

Referenced by ReorderBufferRestoreChanges(), and ReorderBufferSerializeChange().

◆ ReorderBufferSerializeTXN()

static void ReorderBufferSerializeTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 3995 of file reorderbuffer.c.

3996{
3999 int fd = -1;
4001 Size spilled = 0;
4002 Size size = txn->size;
4003
4004 elog(DEBUG2, "spill %u changes in XID %u to disk",
4005 (uint32) txn->nentries_mem, txn->xid);
4006
4007 /* do the same to all child TXs */
4009 {
4011
4014 }
4015
4016 /* serialize changestream */
4018 {
4019 ReorderBufferChange *change;
4020
4021 change = dlist_container(ReorderBufferChange, node, change_i.cur);
4022
4023 /*
4024 * store in segment in which it belongs by start lsn, don't split over
4025 * multiple segments tho
4026 */
4027 if (fd == -1 ||
4029 {
4030 char path[MAXPGPATH];
4031
4032 if (fd != -1)
4034
4036
4037 /*
4038 * No need to care about TLIs here, only used during a single run,
4039 * so each LSN only maps to a specific WAL record.
4040 */
4042 curOpenSegNo);
4043
4044 /* open segment, create it if necessary */
4045 fd = OpenTransientFile(path,
4047
4048 if (fd < 0)
4049 ereport(ERROR,
4051 errmsg("could not open file \"%s\": %m", path)));
4052 }
4053
4054 ReorderBufferSerializeChange(rb, txn, fd, change);
4055 dlist_delete(&change->node);
4056 ReorderBufferFreeChange(rb, change, false);
4057
4058 spilled++;
4059 }
4060
4061 /* Update the memory counter */
4062 ReorderBufferChangeMemoryUpdate(rb, NULL, txn, false, size);
4063
4064 /* update the statistics iff we have spilled anything */
4065 if (spilled)
4066 {
4067 rb->spillCount += 1;
4068 rb->spillBytes += size;
4069
4070 /* don't consider already serialized transactions */
4071 rb->spillTxns += (rbtxn_is_serialized(txn) || rbtxn_is_serialized_clear(txn)) ? 0 : 1;
4072
4073 /* update the decoding stats */
4075 }
4076
4077 Assert(spilled == txn->nentries_mem);
4079 txn->nentries_mem = 0;
4081
4082 if (fd != -1)
4084}

References Assert, ReorderBufferTXN::changes, CloseTransientFile(), DEBUG2, dlist_container, dlist_delete(), dlist_foreach, dlist_foreach_modify, dlist_is_empty(), elog, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), ReorderBufferChange::lsn, MAXPGPATH, MyReplicationSlot, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, OpenTransientFile(), PG_BINARY, RBTXN_IS_SERIALIZED, rbtxn_is_serialized, rbtxn_is_serialized_clear, ReorderBufferChangeMemoryUpdate(), ReorderBufferFreeChange(), ReorderBufferSerializeChange(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReorderBufferTXN::size, ReorderBufferTXN::subtxns, ReorderBufferTXN::txn_flags, UpdateDecodingStats(), wal_segment_size, ReorderBufferTXN::xid, XLByteInSeg, and XLByteToSeg.

Referenced by ReorderBufferCheckMemoryLimit(), ReorderBufferIterTXNInit(), and ReorderBufferSerializeTXN().

◆ ReorderBufferSetBaseSnapshot()

void ReorderBufferSetBaseSnapshot ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Snapshot  snap 
)

Definition at line 3324 of file reorderbuffer.c.

3326{
3327 ReorderBufferTXN *txn;
3328 bool is_new;
3329
3330 Assert(snap != NULL);
3331
3332 /*
3333 * Fetch the transaction to operate on. If we know it's a subtransaction,
3334 * operate on its top-level transaction instead.
3335 */
3336 txn = ReorderBufferTXNByXid(rb, xid, true, &is_new, lsn, true);
3337 if (rbtxn_is_known_subxact(txn))
3338 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3339 NULL, InvalidXLogRecPtr, false);
3340 Assert(txn->base_snapshot == NULL);
3341
3342 txn->base_snapshot = snap;
3343 txn->base_snapshot_lsn = lsn;
3344 dlist_push_tail(&rb->txns_by_base_snapshot_lsn, &txn->base_snapshot_node);
3345
3347}

References Assert, AssertTXNLsnOrder(), ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_lsn, ReorderBufferTXN::base_snapshot_node, dlist_push_tail(), fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, ReorderBufferTXNByXid(), and ReorderBufferTXN::toplevel_xid.

Referenced by SnapBuildCommitTxn(), and SnapBuildProcessChange().

◆ ReorderBufferSetRestartPoint()

void ReorderBufferSetRestartPoint ( ReorderBuffer rb,
XLogRecPtr  ptr 
)

Definition at line 1088 of file reorderbuffer.c.

1089{
1090 rb->current_restart_decoding_lsn = ptr;
1091}

References fb().

Referenced by SnapBuildRestore(), and SnapBuildSerialize().

◆ ReorderBufferSkipPrepare()

void ReorderBufferSkipPrepare ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 2937 of file reorderbuffer.c.

2938{
2939 ReorderBufferTXN *txn;
2940
2941 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2942
2943 /* unknown transaction, nothing to do */
2944 if (txn == NULL)
2945 return;
2946
2947 /* txn must have been marked as a prepared transaction */
2950}

References Assert, fb(), InvalidXLogRecPtr, RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SKIPPED_PREPARE, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by DecodePrepare().

◆ ReorderBufferStreamCommit()

static void ReorderBufferStreamCommit ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1984 of file reorderbuffer.c.

1985{
1986 /* we should only call this for previously streamed transactions */
1988
1990
1991 if (rbtxn_is_prepared(txn))
1992 {
1993 /*
1994 * Note, we send stream prepare even if a concurrent abort is
1995 * detected. See DecodePrepare for more information.
1996 */
1998 rb->stream_prepare(rb, txn, txn->final_lsn);
2000
2001 /*
2002 * This is a PREPARED transaction, part of a two-phase commit. The
2003 * full cleanup will happen as part of the COMMIT PREPAREDs, so now
2004 * just truncate txn by removing changes and tuplecids.
2005 */
2006 ReorderBufferTruncateTXN(rb, txn, true);
2007 /* Reset the CheckXidAlive */
2009 }
2010 else
2011 {
2012 rb->stream_commit(rb, txn, txn->final_lsn);
2014 }
2015}

References Assert, CheckXidAlive, fb(), ReorderBufferTXN::final_lsn, InvalidTransactionId, rbtxn_is_prepared, rbtxn_is_streamed, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, ReorderBufferCleanupTXN(), ReorderBufferStreamTXN(), ReorderBufferTruncateTXN(), and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferReplay().

◆ ReorderBufferStreamTXN()

static void ReorderBufferStreamTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4340 of file reorderbuffer.c.

4341{
4342 Snapshot snapshot_now;
4343 CommandId command_id;
4344 Size stream_bytes;
4345 bool txn_is_streamed;
4346
4347 /* We can never reach here for a subtransaction. */
4348 Assert(rbtxn_is_toptxn(txn));
4349
4350 /*
4351 * We can't make any assumptions about base snapshot here, similar to what
4352 * ReorderBufferCommit() does. That relies on base_snapshot getting
4353 * transferred from subxact in ReorderBufferCommitChild(), but that was
4354 * not yet called as the transaction is in-progress.
4355 *
4356 * So just walk the subxacts and use the same logic here. But we only need
4357 * to do that once, when the transaction is streamed for the first time.
4358 * After that we need to reuse the snapshot from the previous run.
4359 *
4360 * Unlike DecodeCommit which adds xids of all the subtransactions in
4361 * snapshot's xip array via SnapBuildCommitTxn, we can't do that here but
4362 * we do add them to subxip array instead via ReorderBufferCopySnap. This
4363 * allows the catalog changes made in subtransactions decoded till now to
4364 * be visible.
4365 */
4366 if (txn->snapshot_now == NULL)
4367 {
4369
4370 /* make sure this transaction is streamed for the first time */
4372
4373 /* at the beginning we should have invalid command ID */
4375
4377 {
4379
4382 }
4383
4384 /*
4385 * If this transaction has no snapshot, it didn't make any changes to
4386 * the database till now, so there's nothing to decode.
4387 */
4388 if (txn->base_snapshot == NULL)
4389 {
4390 Assert(txn->ninvalidations == 0);
4391 return;
4392 }
4393
4394 command_id = FirstCommandId;
4395 snapshot_now = ReorderBufferCopySnap(rb, txn->base_snapshot,
4396 txn, command_id);
4397 }
4398 else
4399 {
4400 /* the transaction must have been already streamed */
4402
4403 /*
4404 * Nah, we already have snapshot from the previous streaming run. We
4405 * assume new subxacts can't move the LSN backwards, and so can't beat
4406 * the LSN condition in the previous branch (so no need to walk
4407 * through subxacts again). In fact, we must not do that as we may be
4408 * using snapshot half-way through the subxact.
4409 */
4410 command_id = txn->command_id;
4411
4412 /*
4413 * We can't use txn->snapshot_now directly because after the last
4414 * streaming run, we might have got some new sub-transactions. So we
4415 * need to add them to the snapshot.
4416 */
4417 snapshot_now = ReorderBufferCopySnap(rb, txn->snapshot_now,
4418 txn, command_id);
4419
4420 /* Free the previously copied snapshot. */
4421 Assert(txn->snapshot_now->copied);
4423 txn->snapshot_now = NULL;
4424 }
4425
4426 /*
4427 * Remember this information to be used later to update stats. We can't
4428 * update the stats here as an error while processing the changes would
4429 * lead to the accumulation of stats even though we haven't streamed all
4430 * the changes.
4431 */
4433 stream_bytes = txn->total_size;
4434
4435 /* Process and send the changes to output plugin. */
4436 ReorderBufferProcessTXN(rb, txn, InvalidXLogRecPtr, snapshot_now,
4437 command_id, true);
4438
4439 rb->streamCount += 1;
4440 rb->streamBytes += stream_bytes;
4441
4442 /* Don't consider already streamed transaction. */
4443 rb->streamTxns += (txn_is_streamed) ? 0 : 1;
4444
4445 /* update the decoding stats */
4447
4449 Assert(txn->nentries == 0);
4450 Assert(txn->nentries_mem == 0);
4451}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::changes, ReorderBufferTXN::command_id, SnapshotData::copied, dlist_container, dlist_foreach, dlist_is_empty(), fb(), FirstCommandId, InvalidCommandId, InvalidXLogRecPtr, ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, rbtxn_is_toptxn, ReorderBufferCopySnap(), ReorderBufferFreeSnap(), ReorderBufferProcessTXN(), ReorderBufferTransferSnapToParent(), ReorderBufferTXN::snapshot_now, ReorderBufferTXN::subtxns, ReorderBufferTXN::total_size, and UpdateDecodingStats().

Referenced by ReorderBufferCheckMemoryLimit(), ReorderBufferProcessPartialChange(), and ReorderBufferStreamCommit().

◆ ReorderBufferToastAppendChunk()

static void ReorderBufferToastAppendChunk ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change 
)
static

Definition at line 4993 of file reorderbuffer.c.

4995{
4998 bool found;
5000 bool isnull;
5001 Pointer chunk;
5002 TupleDesc desc = RelationGetDescr(relation);
5003 Oid chunk_id;
5005
5006 if (txn->toast_hash == NULL)
5008
5009 Assert(IsToastRelation(relation));
5010
5011 newtup = change->data.tp.newtuple;
5012 chunk_id = DatumGetObjectId(fastgetattr(newtup, 1, desc, &isnull));
5013 Assert(!isnull);
5014 chunk_seq = DatumGetInt32(fastgetattr(newtup, 2, desc, &isnull));
5015 Assert(!isnull);
5016
5018 hash_search(txn->toast_hash, &chunk_id, HASH_ENTER, &found);
5019
5020 if (!found)
5021 {
5022 Assert(ent->chunk_id == chunk_id);
5023 ent->num_chunks = 0;
5024 ent->last_chunk_seq = 0;
5025 ent->size = 0;
5026 ent->reconstructed = NULL;
5027 dlist_init(&ent->chunks);
5028
5029 if (chunk_seq != 0)
5030 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq 0",
5031 chunk_seq, chunk_id);
5032 }
5033 else if (found && chunk_seq != ent->last_chunk_seq + 1)
5034 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq %d",
5035 chunk_seq, chunk_id, ent->last_chunk_seq + 1);
5036
5037 chunk = DatumGetPointer(fastgetattr(newtup, 3, desc, &isnull));
5038 Assert(!isnull);
5039
5040 /* calculate size so we can allocate the right size at once later */
5041 if (!VARATT_IS_EXTENDED(chunk))
5042 chunksize = VARSIZE(chunk) - VARHDRSZ;
5043 else if (VARATT_IS_SHORT(chunk))
5044 /* could happen due to heap_form_tuple doing its thing */
5046 else
5047 elog(ERROR, "unexpected type of toast chunk");
5048
5049 ent->size += chunksize;
5050 ent->last_chunk_seq = chunk_seq;
5051 ent->num_chunks++;
5052 dlist_push_tail(&ent->chunks, &change->node);
5053}

References Assert, ReorderBufferChange::data, DatumGetInt32(), DatumGetObjectId(), DatumGetPointer(), dlist_init(), dlist_push_tail(), elog, ERROR, fastgetattr(), fb(), HASH_ENTER, hash_search(), IsToastRelation(), ReorderBufferChange::newtuple, ReorderBufferChange::node, RelationGetDescr, ReorderBufferToastInitHash(), ReorderBufferTXN::toast_hash, ReorderBufferChange::tp, VARATT_IS_EXTENDED(), VARATT_IS_SHORT(), VARHDRSZ, VARHDRSZ_SHORT, VARSIZE(), and VARSIZE_SHORT().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferToastInitHash()

static void ReorderBufferToastInitHash ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4973 of file reorderbuffer.c.

4974{
4976
4977 Assert(txn->toast_hash == NULL);
4978
4979 hash_ctl.keysize = sizeof(Oid);
4980 hash_ctl.entrysize = sizeof(ReorderBufferToastEnt);
4981 hash_ctl.hcxt = rb->context;
4982 txn->toast_hash = hash_create("ReorderBufferToastHash", 5, &hash_ctl,
4984}

References Assert, fb(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, and ReorderBufferTXN::toast_hash.

Referenced by ReorderBufferToastAppendChunk().

◆ ReorderBufferToastReplace()

static void ReorderBufferToastReplace ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change 
)
static

Definition at line 5076 of file reorderbuffer.c.

5078{
5079 TupleDesc desc;
5080 int natt;
5081 Datum *attrs;
5082 bool *isnull;
5083 bool *free;
5085 Relation toast_rel;
5087 MemoryContext oldcontext;
5089 Size old_size;
5090
5091 /* no toast tuples changed */
5092 if (txn->toast_hash == NULL)
5093 return;
5094
5095 /*
5096 * We're going to modify the size of the change. So, to make sure the
5097 * accounting is correct we record the current change size and then after
5098 * re-computing the change we'll subtract the recorded size and then
5099 * re-add the new change size at the end. We don't immediately subtract
5100 * the old size because if there is any error before we add the new size,
5101 * we will release the changes and that will update the accounting info
5102 * (subtracting the size from the counters). And we don't want to
5103 * underflow there.
5104 */
5106
5107 oldcontext = MemoryContextSwitchTo(rb->context);
5108
5109 /* we should only have toast tuples in an INSERT or UPDATE */
5110 Assert(change->data.tp.newtuple);
5111
5112 desc = RelationGetDescr(relation);
5113
5114 toast_rel = RelationIdGetRelation(relation->rd_rel->reltoastrelid);
5115 if (!RelationIsValid(toast_rel))
5116 elog(ERROR, "could not open toast relation with OID %u (base relation \"%s\")",
5117 relation->rd_rel->reltoastrelid, RelationGetRelationName(relation));
5118
5119 toast_desc = RelationGetDescr(toast_rel);
5120
5121 /* should we allocate from stack instead? */
5122 attrs = palloc0_array(Datum, desc->natts);
5123 isnull = palloc0_array(bool, desc->natts);
5124 free = palloc0_array(bool, desc->natts);
5125
5126 newtup = change->data.tp.newtuple;
5127
5128 heap_deform_tuple(newtup, desc, attrs, isnull);
5129
5130 for (natt = 0; natt < desc->natts; natt++)
5131 {
5135
5136 /* va_rawsize is the size of the original datum -- including header */
5137 varatt_external toast_pointer;
5140 varlena *reconstructed;
5141 dlist_iter it;
5142 Size data_done = 0;
5143
5144 if (attr->attisdropped)
5145 continue;
5146
5147 /* not a varlena datatype */
5148 if (attr->attlen != -1)
5149 continue;
5150
5151 /* no data */
5152 if (isnull[natt])
5153 continue;
5154
5155 /* ok, we know we have a toast datum */
5157
5158 /* no need to do anything if the tuple isn't external */
5160 continue;
5161
5163
5164 /*
5165 * Check whether the toast tuple changed, replace if so.
5166 */
5169 &toast_pointer.va_valueid,
5170 HASH_FIND,
5171 NULL);
5172 if (ent == NULL)
5173 continue;
5174
5175 new_datum =
5177
5178 free[natt] = true;
5179
5180 reconstructed = palloc0(toast_pointer.va_rawsize);
5181
5182 ent->reconstructed = reconstructed;
5183
5184 /* stitch toast tuple back together from its parts */
5185 dlist_foreach(it, &ent->chunks)
5186 {
5187 bool cisnull;
5190 Pointer chunk;
5191
5193 ctup = cchange->data.tp.newtuple;
5195
5196 Assert(!cisnull);
5197 Assert(!VARATT_IS_EXTERNAL(chunk));
5198 Assert(!VARATT_IS_SHORT(chunk));
5199
5200 memcpy(VARDATA(reconstructed) + data_done,
5201 VARDATA(chunk),
5202 VARSIZE(chunk) - VARHDRSZ);
5203 data_done += VARSIZE(chunk) - VARHDRSZ;
5204 }
5205 Assert(data_done == VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer));
5206
5207 /* make sure its marked as compressed or not */
5208 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
5209 SET_VARSIZE_COMPRESSED(reconstructed, data_done + VARHDRSZ);
5210 else
5211 SET_VARSIZE(reconstructed, data_done + VARHDRSZ);
5212
5214 redirect_pointer.pointer = reconstructed;
5215
5218 sizeof(redirect_pointer));
5219
5221 }
5222
5223 /*
5224 * Build tuple in separate memory & copy tuple back into the tuplebuf
5225 * passed to the output plugin. We can't directly heap_fill_tuple() into
5226 * the tuplebuf because attrs[] will point back into the current content.
5227 */
5228 tmphtup = heap_form_tuple(desc, attrs, isnull);
5229 Assert(newtup->t_len <= MaxHeapTupleSize);
5230 Assert(newtup->t_data == (HeapTupleHeader) ((char *) newtup + HEAPTUPLESIZE));
5231
5232 memcpy(newtup->t_data, tmphtup->t_data, tmphtup->t_len);
5233 newtup->t_len = tmphtup->t_len;
5234
5235 /*
5236 * free resources we won't further need, more persistent stuff will be
5237 * free'd in ReorderBufferToastReset().
5238 */
5239 RelationClose(toast_rel);
5240 pfree(tmphtup);
5241 for (natt = 0; natt < desc->natts; natt++)
5242 {
5243 if (free[natt])
5245 }
5246 pfree(attrs);
5247 pfree(free);
5248 pfree(isnull);
5249
5250 MemoryContextSwitchTo(oldcontext);
5251
5252 /* subtract the old change size */
5254 /* now add the change back, with the correct size */
5256 ReorderBufferChangeSize(change));
5257}

References Assert, CompactAttribute::attisdropped, CompactAttribute::attlen, ReorderBufferChange::data, DatumGetPointer(), dlist_container, dlist_foreach, elog, ERROR, fastgetattr(), fb(), free, HASH_FIND, hash_search(), heap_deform_tuple(), heap_form_tuple(), HEAPTUPLESIZE, INDIRECT_POINTER_SIZE, MaxHeapTupleSize, MemoryContextSwitchTo(), TupleDescData::natts, ReorderBufferChange::newtuple, palloc0(), palloc0_array, pfree(), PointerGetDatum(), RelationData::rd_rel, RelationClose(), RelationGetDescr, RelationGetRelationName, RelationIdGetRelation(), RelationIsValid, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), SET_VARSIZE(), SET_VARSIZE_COMPRESSED(), SET_VARTAG_EXTERNAL(), ReorderBufferTXN::toast_hash, ReorderBufferChange::tp, TupleDescCompactAttr(), varatt_external::va_rawsize, varatt_external::va_valueid, VARATT_EXTERNAL_GET_EXTSIZE(), VARATT_EXTERNAL_GET_POINTER, VARATT_EXTERNAL_IS_COMPRESSED(), VARATT_IS_EXTERNAL(), VARATT_IS_SHORT(), VARDATA(), VARDATA_EXTERNAL(), VARHDRSZ, VARSIZE(), and VARTAG_INDIRECT.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferToastReset()

static void ReorderBufferToastReset ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 5263 of file reorderbuffer.c.

5264{
5267
5268 if (txn->toast_hash == NULL)
5269 return;
5270
5271 /* sequentially walk over the hash and free everything */
5274 {
5276
5277 if (ent->reconstructed != NULL)
5278 pfree(ent->reconstructed);
5279
5280 dlist_foreach_modify(it, &ent->chunks)
5281 {
5282 ReorderBufferChange *change =
5284
5285 dlist_delete(&change->node);
5286 ReorderBufferFreeChange(rb, change, true);
5287 }
5288 }
5289
5291 txn->toast_hash = NULL;
5292}

References dlist_container, dlist_delete(), dlist_foreach_modify, fb(), hash_destroy(), hash_seq_init(), hash_seq_search(), ReorderBufferChange::node, pfree(), ReorderBufferFreeChange(), and ReorderBufferTXN::toast_hash.

Referenced by ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferFreeTXN(), ReorderBufferProcessTXN(), and ReorderBufferResetTXN().

◆ ReorderBufferTransferSnapToParent()

static void ReorderBufferTransferSnapToParent ( ReorderBufferTXN txn,
ReorderBufferTXN subtxn 
)
static

Definition at line 1166 of file reorderbuffer.c.

1168{
1169 Assert(subtxn->toplevel_xid == txn->xid);
1170
1171 if (subtxn->base_snapshot != NULL)
1172 {
1173 if (txn->base_snapshot == NULL ||
1174 subtxn->base_snapshot_lsn < txn->base_snapshot_lsn)
1175 {
1176 /*
1177 * If the toplevel transaction already has a base snapshot but
1178 * it's newer than the subxact's, purge it.
1179 */
1180 if (txn->base_snapshot != NULL)
1181 {
1184 }
1185
1186 /*
1187 * The snapshot is now the top transaction's; transfer it, and
1188 * adjust the list position of the top transaction in the list by
1189 * moving it to where the subtransaction is.
1190 */
1191 txn->base_snapshot = subtxn->base_snapshot;
1192 txn->base_snapshot_lsn = subtxn->base_snapshot_lsn;
1193 dlist_insert_before(&subtxn->base_snapshot_node,
1194 &txn->base_snapshot_node);
1195
1196 /*
1197 * The subtransaction doesn't have a snapshot anymore (so it
1198 * mustn't be in the list.)
1199 */
1200 subtxn->base_snapshot = NULL;
1201 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1202 dlist_delete(&subtxn->base_snapshot_node);
1203 }
1204 else
1205 {
1206 /* Base snap of toplevel is fine, so subxact's is not needed */
1207 SnapBuildSnapDecRefcount(subtxn->base_snapshot);
1208 dlist_delete(&subtxn->base_snapshot_node);
1209 subtxn->base_snapshot = NULL;
1210 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1211 }
1212 }
1213}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_lsn, ReorderBufferTXN::base_snapshot_node, dlist_delete(), dlist_insert_before(), fb(), InvalidXLogRecPtr, SnapBuildSnapDecRefcount(), and ReorderBufferTXN::xid.

Referenced by ReorderBufferAssignChild(), and ReorderBufferStreamTXN().

◆ ReorderBufferTruncateTXN()

static void ReorderBufferTruncateTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
bool  txn_prepared 
)
static

Definition at line 1657 of file reorderbuffer.c.

1658{
1659 dlist_mutable_iter iter;
1660 Size mem_freed = 0;
1661
1662 /* cleanup subtransactions & their changes */
1663 dlist_foreach_modify(iter, &txn->subtxns)
1664 {
1666
1668
1669 /*
1670 * Subtransactions are always associated to the toplevel TXN, even if
1671 * they originally were happening inside another subtxn, so we won't
1672 * ever recurse more than one level deep here.
1673 */
1675 Assert(subtxn->nsubtxns == 0);
1676
1679 }
1680
1681 /* cleanup changes in the txn */
1682 dlist_foreach_modify(iter, &txn->changes)
1683 {
1684 ReorderBufferChange *change;
1685
1686 change = dlist_container(ReorderBufferChange, node, iter.cur);
1687
1688 /* Check we're not mixing changes from different transactions. */
1689 Assert(change->txn == txn);
1690
1691 /* remove the change from its containing list */
1692 dlist_delete(&change->node);
1693
1694 /*
1695 * Instead of updating the memory counter for individual changes, we
1696 * sum up the size of memory to free so we can update the memory
1697 * counter all together below. This saves costs of maintaining the
1698 * max-heap.
1699 */
1701
1702 ReorderBufferFreeChange(rb, change, false);
1703 }
1704
1705 /* Update the memory counter */
1707
1708 if (txn_prepared)
1709 {
1710 /*
1711 * If this is a prepared txn, cleanup the tuplecids we stored for
1712 * decoding catalog snapshot access. They are always stored in the
1713 * toplevel transaction.
1714 */
1715 dlist_foreach_modify(iter, &txn->tuplecids)
1716 {
1717 ReorderBufferChange *change;
1718
1719 change = dlist_container(ReorderBufferChange, node, iter.cur);
1720
1721 /* Check we're not mixing changes from different transactions. */
1722 Assert(change->txn == txn);
1724
1725 /* Remove the change from its containing list. */
1726 dlist_delete(&change->node);
1727
1728 ReorderBufferFreeChange(rb, change, true);
1729 }
1730 }
1731
1732 /*
1733 * Destroy the (relfilelocator, ctid) hashtable, so that we don't leak any
1734 * memory. We could also keep the hash table and update it with new ctid
1735 * values, but this seems simpler and good enough for now.
1736 */
1737 if (txn->tuplecid_hash != NULL)
1738 {
1740 txn->tuplecid_hash = NULL;
1741 }
1742
1743 /* If this txn is serialized then clean the disk space. */
1744 if (rbtxn_is_serialized(txn))
1745 {
1748
1749 /*
1750 * We set this flag to indicate if the transaction is ever serialized.
1751 * We need this to accurately update the stats as otherwise the same
1752 * transaction can be counted as serialized multiple times.
1753 */
1755 }
1756
1757 /* also reset the number of entries in the transaction */
1758 txn->nentries_mem = 0;
1759 txn->nentries = 0;
1760}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::changes, dlist_mutable_iter::cur, dlist_container, dlist_delete(), dlist_foreach_modify, fb(), hash_destroy(), ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, rbtxn_is_known_subxact, rbtxn_is_serialized, RBTXN_IS_SERIALIZED_CLEAR, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferFreeChange(), ReorderBufferMaybeMarkTXNStreamed(), ReorderBufferRestoreCleanup(), ReorderBufferTruncateTXN(), ReorderBufferTXN::subtxns, ReorderBufferTXN::tuplecid_hash, ReorderBufferTXN::tuplecids, ReorderBufferChange::txn, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferProcessTXN(), ReorderBufferResetTXN(), ReorderBufferStreamCommit(), and ReorderBufferTruncateTXN().

◆ ReorderBufferTXNByXid()

static ReorderBufferTXN * ReorderBufferTXNByXid ( ReorderBuffer rb,
TransactionId  xid,
bool  create,
bool is_new,
XLogRecPtr  lsn,
bool  create_as_top 
)
static

Definition at line 654 of file reorderbuffer.c.

656{
657 ReorderBufferTXN *txn;
659 bool found;
660
662
663 /*
664 * Check the one-entry lookup cache first
665 */
666 if (TransactionIdIsValid(rb->by_txn_last_xid) &&
667 rb->by_txn_last_xid == xid)
668 {
669 txn = rb->by_txn_last_txn;
670
671 if (txn != NULL)
672 {
673 /* found it, and it's valid */
674 if (is_new)
675 *is_new = false;
676 return txn;
677 }
678
679 /*
680 * cached as non-existent, and asked not to create? Then nothing else
681 * to do.
682 */
683 if (!create)
684 return NULL;
685 /* otherwise fall through to create it */
686 }
687
688 /*
689 * If the cache wasn't hit or it yielded a "does-not-exist" and we want to
690 * create an entry.
691 */
692
693 /* search the lookup table */
695 hash_search(rb->by_txn,
696 &xid,
697 create ? HASH_ENTER : HASH_FIND,
698 &found);
699 if (found)
700 txn = ent->txn;
701 else if (create)
702 {
703 /* initialize the new entry, if creation was requested */
704 Assert(ent != NULL);
706
708 ent->txn->xid = xid;
709 txn = ent->txn;
710 txn->first_lsn = lsn;
711 txn->restart_decoding_lsn = rb->current_restart_decoding_lsn;
712
713 if (create_as_top)
714 {
715 dlist_push_tail(&rb->toplevel_by_lsn, &txn->node);
717 }
718 }
719 else
720 txn = NULL; /* not found and not asked to create */
721
722 /* update cache */
723 rb->by_txn_last_xid = xid;
724 rb->by_txn_last_txn = txn;
725
726 if (is_new)
727 *is_new = !found;
728
729 Assert(!create || txn != NULL);
730 return txn;
731}

References Assert, AssertTXNLsnOrder(), dlist_push_tail(), fb(), ReorderBufferTXN::first_lsn, HASH_ENTER, HASH_FIND, hash_search(), ReorderBufferTXN::node, ReorderBufferAllocTXN(), ReorderBufferTXN::restart_decoding_lsn, TransactionIdIsValid, and XLogRecPtrIsValid.

Referenced by ReorderBufferAbort(), ReorderBufferAddDistributedInvalidations(), ReorderBufferAddInvalidations(), ReorderBufferAddNewTupleCids(), ReorderBufferAssignChild(), ReorderBufferCommit(), ReorderBufferCommitChild(), ReorderBufferFinishPrepared(), ReorderBufferForget(), ReorderBufferGetInvalidations(), ReorderBufferInvalidate(), ReorderBufferPrepare(), ReorderBufferProcessXid(), ReorderBufferQueueChange(), ReorderBufferQueueMessage(), ReorderBufferRememberPrepareInfo(), ReorderBufferSetBaseSnapshot(), ReorderBufferSkipPrepare(), ReorderBufferXidHasBaseSnapshot(), ReorderBufferXidHasCatalogChanges(), and ReorderBufferXidSetCatalogChanges().

◆ ReorderBufferTXNSizeCompare()

static int ReorderBufferTXNSizeCompare ( const pairingheap_node a,
const pairingheap_node b,
void arg 
)
static

Definition at line 3787 of file reorderbuffer.c.

3788{
3791
3792 if (ta->size < tb->size)
3793 return -1;
3794 if (ta->size > tb->size)
3795 return 1;
3796 return 0;
3797}

References a, b, fb(), and pairingheap_const_container.

Referenced by ReorderBufferAllocate().

◆ ReorderBufferXidHasBaseSnapshot()

bool ReorderBufferXidHasBaseSnapshot ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 3740 of file reorderbuffer.c.

3741{
3742 ReorderBufferTXN *txn;
3743
3744 txn = ReorderBufferTXNByXid(rb, xid, false,
3745 NULL, InvalidXLogRecPtr, false);
3746
3747 /* transaction isn't known yet, ergo no snapshot */
3748 if (txn == NULL)
3749 return false;
3750
3751 /* a known subtxn? operate on top-level txn instead */
3752 if (rbtxn_is_known_subxact(txn))
3753 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3754 NULL, InvalidXLogRecPtr, false);
3755
3756 return txn->base_snapshot != NULL;
3757}

References ReorderBufferTXN::base_snapshot, fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, ReorderBufferTXNByXid(), and ReorderBufferTXN::toplevel_xid.

Referenced by SnapBuildCommitTxn(), SnapBuildDistributeSnapshotAndInval(), and SnapBuildProcessChange().

◆ ReorderBufferXidHasCatalogChanges()

bool ReorderBufferXidHasCatalogChanges ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 3723 of file reorderbuffer.c.

3724{
3725 ReorderBufferTXN *txn;
3726
3727 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3728 false);
3729 if (txn == NULL)
3730 return false;
3731
3732 return rbtxn_has_catalog_changes(txn);
3733}

References fb(), InvalidXLogRecPtr, rbtxn_has_catalog_changes, and ReorderBufferTXNByXid().

Referenced by SnapBuildXidHasCatalogChanges().

◆ ReorderBufferXidSetCatalogChanges()

void ReorderBufferXidSetCatalogChanges ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3651 of file reorderbuffer.c.

3653{
3654 ReorderBufferTXN *txn;
3655
3656 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3657
3658 if (!rbtxn_has_catalog_changes(txn))
3659 {
3661 dclist_push_tail(&rb->catchange_txns, &txn->catchange_node);
3662 }
3663
3664 /*
3665 * Mark top-level transaction as having catalog changes too if one of its
3666 * children has so that the ReorderBufferBuildTupleCidHash can
3667 * conveniently check just top-level transaction and decide whether to
3668 * build the hash table or not.
3669 */
3670 if (rbtxn_is_subtxn(txn))
3671 {
3672 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
3673
3674 if (!rbtxn_has_catalog_changes(toptxn))
3675 {
3677 dclist_push_tail(&rb->catchange_txns, &toptxn->catchange_node);
3678 }
3679 }
3680}

References ReorderBufferTXN::catchange_node, dclist_push_tail(), fb(), rbtxn_get_toptxn, RBTXN_HAS_CATALOG_CHANGES, rbtxn_has_catalog_changes, rbtxn_is_subtxn, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by SnapBuildProcessNewCid(), and xact_decode().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)

Definition at line 5555 of file reorderbuffer.c.

5559{
5562 ForkNumber forkno;
5563 BlockNumber blockno;
5564 bool updated_mapping = false;
5565
5566 /*
5567 * Return unresolved if tuplecid_data is not valid. That's because when
5568 * streaming in-progress transactions we may run into tuples with the CID
5569 * before actually decoding them. Think e.g. about INSERT followed by
5570 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5571 * INSERT. So in such cases, we assume the CID is from the future
5572 * command.
5573 */
5574 if (tuplecid_data == NULL)
5575 return false;
5576
5577 /* be careful about padding */
5578 memset(&key, 0, sizeof(key));
5579
5580 Assert(!BufferIsLocal(buffer));
5581
5582 /*
5583 * get relfilelocator from the buffer, no convenient way to access it
5584 * other than that.
5585 */
5586 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5587
5588 /* tuples can only be in the main fork */
5589 Assert(forkno == MAIN_FORKNUM);
5590 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5591
5592 ItemPointerCopy(&htup->t_self,
5593 &key.tid);
5594
5595restart:
5598
5599 /*
5600 * failed to find a mapping, check whether the table was rewritten and
5601 * apply mapping if so, but only do that once - there can be no new
5602 * mappings while we are in here since we have to hold a lock on the
5603 * relation.
5604 */
5605 if (ent == NULL && !updated_mapping)
5606 {
5608 /* now check but don't update for a mapping again */
5609 updated_mapping = true;
5610 goto restart;
5611 }
5612 else if (ent == NULL)
5613 return false;
5614
5615 if (cmin)
5616 *cmin = ent->cmin;
5617 if (cmax)
5618 *cmax = ent->cmax;
5619 return true;
5620}

References Assert, BufferGetTag(), BufferIsLocal, fb(), HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ SetupCheckXidLive()

static void SetupCheckXidLive ( TransactionId  xid)
inlinestatic

Definition at line 2050 of file reorderbuffer.c.

2051{
2052 /*
2053 * If the input transaction id is already set as a CheckXidAlive then
2054 * nothing to do.
2055 */
2057 return;
2058
2059 /*
2060 * setup CheckXidAlive if it's not committed yet. We don't check if the
2061 * xid is aborted. That will happen during catalog access.
2062 */
2063 if (!TransactionIdDidCommit(xid))
2064 CheckXidAlive = xid;
2065 else
2067}

References CheckXidAlive, InvalidTransactionId, TransactionIdDidCommit(), and TransactionIdEquals.

Referenced by ReorderBufferProcessTXN().

◆ StartupReorderBuffer()

void StartupReorderBuffer ( void  )

Definition at line 4939 of file reorderbuffer.c.

4940{
4942 struct dirent *logical_de;
4943
4946 {
4947 if (strcmp(logical_de->d_name, ".") == 0 ||
4948 strcmp(logical_de->d_name, "..") == 0)
4949 continue;
4950
4951 /* if it cannot be a slot, skip the directory */
4952 if (!ReplicationSlotValidateName(logical_de->d_name, true, DEBUG2))
4953 continue;
4954
4955 /*
4956 * ok, has to be a surviving logical slot, iterate and delete
4957 * everything starting with xid-*
4958 */
4960 }
4962}

References AllocateDir(), DEBUG2, fb(), FreeDir(), PG_REPLSLOT_DIR, ReadDir(), ReorderBufferCleanupSerializedTXNs(), and ReplicationSlotValidateName().

Referenced by StartupXLOG().

◆ TransactionIdInArray()

static bool TransactionIdInArray ( TransactionId  xid,
TransactionId xip,
Size  num 
)
static

Definition at line 5454 of file reorderbuffer.c.

5455{
5456 return bsearch(&xid, xip, num,
5457 sizeof(TransactionId), xidComparator) != NULL;
5458}

References fb(), and xidComparator().

Referenced by UpdateLogicalMappings().

◆ UpdateLogicalMappings()

static void UpdateLogicalMappings ( HTAB tuplecid_data,
Oid  relid,
Snapshot  snapshot 
)
static

Definition at line 5477 of file reorderbuffer.c.

5478{
5480 struct dirent *mapping_de;
5481 List *files = NIL;
5482 ListCell *file;
5483 Oid dboid = IsSharedRelation(relid) ? InvalidOid : MyDatabaseId;
5484
5487 {
5488 Oid f_dboid;
5489 Oid f_relid;
5493 uint32 f_hi,
5494 f_lo;
5496
5497 if (strcmp(mapping_de->d_name, ".") == 0 ||
5498 strcmp(mapping_de->d_name, "..") == 0)
5499 continue;
5500
5501 /* Ignore files that aren't ours */
5502 if (strncmp(mapping_de->d_name, "map-", 4) != 0)
5503 continue;
5504
5506 &f_dboid, &f_relid, &f_hi, &f_lo,
5507 &f_mapped_xid, &f_create_xid) != 6)
5508 elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
5509
5510 f_lsn = ((uint64) f_hi) << 32 | f_lo;
5511
5512 /* mapping for another database */
5513 if (f_dboid != dboid)
5514 continue;
5515
5516 /* mapping for another relation */
5517 if (f_relid != relid)
5518 continue;
5519
5520 /* did the creating transaction abort? */
5522 continue;
5523
5524 /* not for our transaction */
5525 if (!TransactionIdInArray(f_mapped_xid, snapshot->subxip, snapshot->subxcnt))
5526 continue;
5527
5528 /* ok, relevant, queue for apply */
5530 f->lsn = f_lsn;
5531 strcpy(f->fname, mapping_de->d_name);
5532 files = lappend(files, f);
5533 }
5535
5536 /* sort files so we apply them in LSN order */
5538
5539 foreach(file, files)
5540 {
5542
5543 elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
5544 snapshot->subxip[0]);
5546 pfree(f);
5547 }
5548}

References AllocateDir(), ApplyLogicalMappingFile(), DEBUG1, elog, ERROR, fb(), file_sort_by_lsn(), RewriteMappingFile::fname, FreeDir(), InvalidOid, IsSharedRelation(), lappend(), lfirst, list_sort(), LOGICAL_REWRITE_FORMAT, RewriteMappingFile::lsn, MyDatabaseId, NIL, palloc_object, pfree(), PG_LOGICAL_MAPPINGS_DIR, ReadDir(), SnapshotData::subxcnt, SnapshotData::subxip, TransactionIdDidCommit(), TransactionIdInArray(), and tuplecid_data.

Referenced by ResolveCminCmaxDuringDecoding().

Variable Documentation

◆ debug_logical_replication_streaming

◆ logical_decoding_work_mem

int logical_decoding_work_mem

Definition at line 226 of file reorderbuffer.c.

Referenced by ReorderBufferCheckMemoryLimit().

◆ max_changes_in_memory

const Size max_changes_in_memory = 4096
static

Definition at line 227 of file reorderbuffer.c.

Referenced by ReorderBufferRestoreChanges().