PostgreSQL Source Code git master
Loading...
Searching...
No Matches
reorderbuffer.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <sys/stat.h>
#include "access/detoast.h"
#include "access/heapam.h"
#include "access/rewriteheap.h"
#include "access/transam.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "catalog/catalog.h"
#include "common/int.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/reorderbuffer.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/procarray.h"
#include "storage/sinval.h"
#include "utils/builtins.h"
#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/relfilenumbermap.h"
Include dependency graph for reorderbuffer.c:

Go to the source code of this file.

Data Structures

struct  ReorderBufferTXNByIdEnt
 
struct  ReorderBufferTupleCidKey
 
struct  ReorderBufferTupleCidEnt
 
struct  TXNEntryFile
 
struct  ReorderBufferIterTXNEntry
 
struct  ReorderBufferIterTXNState
 
struct  ReorderBufferToastEnt
 
struct  ReorderBufferDiskChange
 
struct  RewriteMappingFile
 

Macros

#define MAX_DISTR_INVAL_MSG_PER_TXN    ((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage))
 
#define IsSpecInsert(action)
 
#define IsSpecConfirmOrAbort(action)
 
#define IsInsertOrUpdate(action)
 
#define CHANGES_THRESHOLD   100
 

Typedefs

typedef struct ReorderBufferTXNByIdEnt ReorderBufferTXNByIdEnt
 
typedef struct ReorderBufferTupleCidKey ReorderBufferTupleCidKey
 
typedef struct ReorderBufferTupleCidEnt ReorderBufferTupleCidEnt
 
typedef struct TXNEntryFile TXNEntryFile
 
typedef struct ReorderBufferIterTXNEntry ReorderBufferIterTXNEntry
 
typedef struct ReorderBufferIterTXNState ReorderBufferIterTXNState
 
typedef struct ReorderBufferToastEnt ReorderBufferToastEnt
 
typedef struct ReorderBufferDiskChange ReorderBufferDiskChange
 
typedef struct RewriteMappingFile RewriteMappingFile
 

Functions

static ReorderBufferTXNReorderBufferAllocTXN (ReorderBuffer *rb)
 
static void ReorderBufferFreeTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static ReorderBufferTXNReorderBufferTXNByXid (ReorderBuffer *rb, TransactionId xid, bool create, bool *is_new, XLogRecPtr lsn, bool create_as_top)
 
static void ReorderBufferTransferSnapToParent (ReorderBufferTXN *txn, ReorderBufferTXN *subtxn)
 
static void AssertTXNLsnOrder (ReorderBuffer *rb)
 
static void ReorderBufferIterTXNInit (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferIterTXNState *volatile *iter_state)
 
static ReorderBufferChangeReorderBufferIterTXNNext (ReorderBuffer *rb, ReorderBufferIterTXNState *state)
 
static void ReorderBufferIterTXNFinish (ReorderBuffer *rb, ReorderBufferIterTXNState *state)
 
static void ReorderBufferExecuteInvalidations (uint32 nmsgs, SharedInvalidationMessage *msgs)
 
static void ReorderBufferCheckMemoryLimit (ReorderBuffer *rb)
 
static void ReorderBufferSerializeTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferSerializeChange (ReorderBuffer *rb, ReorderBufferTXN *txn, int fd, ReorderBufferChange *change)
 
static Size ReorderBufferRestoreChanges (ReorderBuffer *rb, ReorderBufferTXN *txn, TXNEntryFile *file, XLogSegNo *segno)
 
static void ReorderBufferRestoreChange (ReorderBuffer *rb, ReorderBufferTXN *txn, char *data)
 
static void ReorderBufferRestoreCleanup (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferTruncateTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, bool txn_prepared)
 
static void ReorderBufferMaybeMarkTXNStreamed (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static bool ReorderBufferCheckAndTruncateAbortedTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferCleanupSerializedTXNs (const char *slotname)
 
static void ReorderBufferSerializedPath (char *path, ReplicationSlot *slot, TransactionId xid, XLogSegNo segno)
 
static int ReorderBufferTXNSizeCompare (const pairingheap_node *a, const pairingheap_node *b, void *arg)
 
static void ReorderBufferFreeSnap (ReorderBuffer *rb, Snapshot snap)
 
static Snapshot ReorderBufferCopySnap (ReorderBuffer *rb, Snapshot orig_snap, ReorderBufferTXN *txn, CommandId cid)
 
static bool ReorderBufferCanStream (ReorderBuffer *rb)
 
static bool ReorderBufferCanStartStreaming (ReorderBuffer *rb)
 
static void ReorderBufferStreamTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferStreamCommit (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastInitHash (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastReset (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastReplace (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
 
static void ReorderBufferToastAppendChunk (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
 
static Size ReorderBufferChangeSize (ReorderBufferChange *change)
 
static void ReorderBufferChangeMemoryUpdate (ReorderBuffer *rb, ReorderBufferChange *change, ReorderBufferTXN *txn, bool addition, Size sz)
 
ReorderBufferReorderBufferAllocate (void)
 
void ReorderBufferFree (ReorderBuffer *rb)
 
ReorderBufferChangeReorderBufferAllocChange (ReorderBuffer *rb)
 
void ReorderBufferFreeChange (ReorderBuffer *rb, ReorderBufferChange *change, bool upd_mem)
 
HeapTuple ReorderBufferAllocTupleBuf (ReorderBuffer *rb, Size tuple_len)
 
void ReorderBufferFreeTupleBuf (HeapTuple tuple)
 
OidReorderBufferAllocRelids (ReorderBuffer *rb, int nrelids)
 
void ReorderBufferFreeRelids (ReorderBuffer *rb, Oid *relids)
 
static void ReorderBufferProcessPartialChange (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool toast_insert)
 
void ReorderBufferQueueChange (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, ReorderBufferChange *change, bool toast_insert)
 
void ReorderBufferQueueMessage (ReorderBuffer *rb, TransactionId xid, Snapshot snap, XLogRecPtr lsn, bool transactional, const char *prefix, Size message_size, const char *message)
 
static void AssertChangeLsnOrder (ReorderBufferTXN *txn)
 
ReorderBufferTXNReorderBufferGetOldestTXN (ReorderBuffer *rb)
 
TransactionId ReorderBufferGetOldestXmin (ReorderBuffer *rb)
 
void ReorderBufferSetRestartPoint (ReorderBuffer *rb, XLogRecPtr ptr)
 
void ReorderBufferAssignChild (ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr lsn)
 
void ReorderBufferCommitChild (ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn)
 
static int ReorderBufferIterCompare (Datum a, Datum b, void *arg)
 
static void ReorderBufferCleanupTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferBuildTupleCidHash (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void SetupCheckXidLive (TransactionId xid)
 
static void ReorderBufferApplyChange (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferApplyTruncate (ReorderBuffer *rb, ReorderBufferTXN *txn, int nrelations, Relation *relations, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferApplyMessage (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferSaveTXNSnapshot (ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id)
 
static void ReorderBufferResetTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id, XLogRecPtr last_lsn, ReorderBufferChange *specinsert)
 
static void ReorderBufferProcessTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr commit_lsn, volatile Snapshot snapshot_now, volatile CommandId command_id, bool streaming)
 
static void ReorderBufferReplay (ReorderBufferTXN *txn, ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, RepOriginId origin_id, XLogRecPtr origin_lsn)
 
void ReorderBufferCommit (ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, RepOriginId origin_id, XLogRecPtr origin_lsn)
 
bool ReorderBufferRememberPrepareInfo (ReorderBuffer *rb, TransactionId xid, XLogRecPtr prepare_lsn, XLogRecPtr end_lsn, TimestampTz prepare_time, RepOriginId origin_id, XLogRecPtr origin_lsn)
 
void ReorderBufferSkipPrepare (ReorderBuffer *rb, TransactionId xid)
 
void ReorderBufferPrepare (ReorderBuffer *rb, TransactionId xid, char *gid)
 
void ReorderBufferFinishPrepared (ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, XLogRecPtr two_phase_at, TimestampTz commit_time, RepOriginId origin_id, XLogRecPtr origin_lsn, char *gid, bool is_commit)
 
void ReorderBufferAbort (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, TimestampTz abort_time)
 
void ReorderBufferAbortOld (ReorderBuffer *rb, TransactionId oldestRunningXid)
 
void ReorderBufferForget (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferInvalidate (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferImmediateInvalidation (ReorderBuffer *rb, uint32 ninvalidations, SharedInvalidationMessage *invalidations)
 
void ReorderBufferProcessXid (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferAddSnapshot (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
 
void ReorderBufferSetBaseSnapshot (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
 
void ReorderBufferAddNewCommandId (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, CommandId cid)
 
void ReorderBufferAddNewTupleCids (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, RelFileLocator locator, ItemPointerData tid, CommandId cmin, CommandId cmax, CommandId combocid)
 
static void ReorderBufferQueueInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
static void ReorderBufferAccumulateInvalidations (SharedInvalidationMessage **invals_out, uint32 *ninvals_out, SharedInvalidationMessage *msgs_new, Size nmsgs_new)
 
void ReorderBufferAddInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
void ReorderBufferAddDistributedInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
void ReorderBufferXidSetCatalogChanges (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
TransactionIdReorderBufferGetCatalogChangesXacts (ReorderBuffer *rb)
 
bool ReorderBufferXidHasCatalogChanges (ReorderBuffer *rb, TransactionId xid)
 
bool ReorderBufferXidHasBaseSnapshot (ReorderBuffer *rb, TransactionId xid)
 
static void ReorderBufferSerializeReserve (ReorderBuffer *rb, Size sz)
 
static ReorderBufferTXNReorderBufferLargestTXN (ReorderBuffer *rb)
 
static ReorderBufferTXNReorderBufferLargestStreamableTopTXN (ReorderBuffer *rb)
 
void StartupReorderBuffer (void)
 
static void ApplyLogicalMappingFile (HTAB *tuplecid_data, Oid relid, const char *fname)
 
static bool TransactionIdInArray (TransactionId xid, TransactionId *xip, Size num)
 
static int file_sort_by_lsn (const ListCell *a_p, const ListCell *b_p)
 
static void UpdateLogicalMappings (HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
 
bool ResolveCminCmaxDuringDecoding (HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
uint32 ReorderBufferGetInvalidations (ReorderBuffer *rb, TransactionId xid, SharedInvalidationMessage **msgs)
 

Variables

int logical_decoding_work_mem
 
static const Size max_changes_in_memory = 4096
 
int debug_logical_replication_streaming = DEBUG_LOGICAL_REP_STREAMING_BUFFERED
 

Macro Definition Documentation

◆ CHANGES_THRESHOLD

#define CHANGES_THRESHOLD   100

◆ IsInsertOrUpdate

#define IsInsertOrUpdate (   action)
Value:
( \
(((action) == REORDER_BUFFER_CHANGE_INSERT) || \
((action) == REORDER_BUFFER_CHANGE_UPDATE) || \
)
@ REORDER_BUFFER_CHANGE_INSERT
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT
@ REORDER_BUFFER_CHANGE_UPDATE

Definition at line 206 of file reorderbuffer.c.

324{
325 ReorderBuffer *buffer;
328
330
331 /* allocate memory in own context, to have better accountability */
333 "ReorderBuffer",
335
336 buffer =
338
339 memset(&hash_ctl, 0, sizeof(hash_ctl));
340
341 buffer->context = new_ctx;
342
344 "Change",
346 sizeof(ReorderBufferChange));
347
349 "TXN",
351 sizeof(ReorderBufferTXN));
352
353 /*
354 * To minimize memory fragmentation caused by long-running transactions
355 * with changes spanning multiple memory blocks, we use a single
356 * fixed-size memory block for decoded tuple storage. The performance
357 * testing showed that the default memory block size maintains logical
358 * decoding performance without causing fragmentation due to concurrent
359 * transactions. One might think that we can use the max size as
360 * SLAB_LARGE_BLOCK_SIZE but the test also showed it doesn't help resolve
361 * the memory fragmentation.
362 */
364 "Tuples",
368
369 hash_ctl.keysize = sizeof(TransactionId);
370 hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
371 hash_ctl.hcxt = buffer->context;
372
373 buffer->by_txn = hash_create("ReorderBufferByXid", 1000, &hash_ctl,
375
377 buffer->by_txn_last_txn = NULL;
378
379 buffer->outbuf = NULL;
380 buffer->outbufsize = 0;
381 buffer->size = 0;
382
383 /* txn_heap is ordered by transaction size */
385
386 buffer->spillTxns = 0;
387 buffer->spillCount = 0;
388 buffer->spillBytes = 0;
389 buffer->streamTxns = 0;
390 buffer->streamCount = 0;
391 buffer->streamBytes = 0;
392 buffer->memExceededCount = 0;
393 buffer->totalTxns = 0;
394 buffer->totalBytes = 0;
395
397
398 dlist_init(&buffer->toplevel_by_lsn);
400 dclist_init(&buffer->catchange_txns);
401
402 /*
403 * Ensure there's no stale data from prior uses of this slot, in case some
404 * prior exit avoided calling ReorderBufferFree. Failure to do this can
405 * produce duplicated txns, and it's very cheap if there's nothing there.
406 */
408
409 return buffer;
410}
411
412/*
413 * Free a ReorderBuffer
414 */
415void
417{
418 MemoryContext context = rb->context;
419
420 /*
421 * We free separately allocated data by entirely scrapping reorderbuffer's
422 * memory context.
423 */
424 MemoryContextDelete(context);
425
426 /* Free disk space used by unconsumed reorder buffers */
428}
429
430/*
431 * Allocate a new ReorderBufferTXN.
432 */
433static ReorderBufferTXN *
435{
436 ReorderBufferTXN *txn;
437
438 txn = (ReorderBufferTXN *)
439 MemoryContextAlloc(rb->txn_context, sizeof(ReorderBufferTXN));
440
441 memset(txn, 0, sizeof(ReorderBufferTXN));
442
443 dlist_init(&txn->changes);
444 dlist_init(&txn->tuplecids);
445 dlist_init(&txn->subtxns);
446
447 /* InvalidCommandId is not zero, so set it explicitly */
450
451 return txn;
452}
453
454/*
455 * Free a ReorderBufferTXN.
456 */
457static void
459{
460 /* clean the lookup cache if we were cached (quite likely) */
461 if (rb->by_txn_last_xid == txn->xid)
462 {
463 rb->by_txn_last_xid = InvalidTransactionId;
464 rb->by_txn_last_txn = NULL;
465 }
466
467 /* free data that's contained */
468
469 if (txn->gid != NULL)
470 {
471 pfree(txn->gid);
472 txn->gid = NULL;
473 }
474
475 if (txn->tuplecid_hash != NULL)
476 {
478 txn->tuplecid_hash = NULL;
479 }
480
481 if (txn->invalidations)
482 {
483 pfree(txn->invalidations);
484 txn->invalidations = NULL;
485 }
486
488 {
491 }
492
493 /* Reset the toast hash */
495
496 /* All changes must be deallocated */
497 Assert(txn->size == 0);
498
499 pfree(txn);
500}
501
502/*
503 * Allocate a ReorderBufferChange.
504 */
507{
508 ReorderBufferChange *change;
509
510 change = (ReorderBufferChange *)
511 MemoryContextAlloc(rb->change_context, sizeof(ReorderBufferChange));
512
513 memset(change, 0, sizeof(ReorderBufferChange));
514 return change;
515}
516
517/*
518 * Free a ReorderBufferChange and update memory accounting, if requested.
519 */
520void
522 bool upd_mem)
523{
524 /* update memory accounting info */
525 if (upd_mem)
528
529 /* free contained data */
530 switch (change->action)
531 {
536 if (change->data.tp.newtuple)
537 {
539 change->data.tp.newtuple = NULL;
540 }
541
542 if (change->data.tp.oldtuple)
543 {
545 change->data.tp.oldtuple = NULL;
546 }
547 break;
549 if (change->data.msg.prefix != NULL)
550 pfree(change->data.msg.prefix);
551 change->data.msg.prefix = NULL;
552 if (change->data.msg.message != NULL)
553 pfree(change->data.msg.message);
554 change->data.msg.message = NULL;
555 break;
557 if (change->data.inval.invalidations)
558 pfree(change->data.inval.invalidations);
559 change->data.inval.invalidations = NULL;
560 break;
562 if (change->data.snapshot)
563 {
565 change->data.snapshot = NULL;
566 }
567 break;
568 /* no data in addition to the struct itself */
570 if (change->data.truncate.relids != NULL)
571 {
573 change->data.truncate.relids = NULL;
574 }
575 break;
580 break;
581 }
582
583 pfree(change);
584}
585
586/*
587 * Allocate a HeapTuple fitting a tuple of size tuple_len (excluding header
588 * overhead).
589 */
592{
593 HeapTuple tuple;
595
596 alloc_len = tuple_len + SizeofHeapTupleHeader;
597
598 tuple = (HeapTuple) MemoryContextAlloc(rb->tup_context,
600 tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
601
602 return tuple;
603}
604
605/*
606 * Free a HeapTuple returned by ReorderBufferAllocTupleBuf().
607 */
608void
610{
611 pfree(tuple);
612}
613
614/*
615 * Allocate an array for relids of truncated relations.
616 *
617 * We use the global memory context (for the whole reorder buffer), because
618 * none of the existing ones seems like a good match (some are SLAB, so we
619 * can't use those, and tup_context is meant for tuple data, not relids). We
620 * could add yet another context, but it seems like an overkill - TRUNCATE is
621 * not particularly common operation, so it does not seem worth it.
622 */
623Oid *
625{
626 Oid *relids;
628
629 alloc_len = sizeof(Oid) * nrelids;
630
631 relids = (Oid *) MemoryContextAlloc(rb->context, alloc_len);
632
633 return relids;
634}
635
636/*
637 * Free an array of relids.
638 */
639void
641{
642 pfree(relids);
643}
644
645/*
646 * Return the ReorderBufferTXN from the given buffer, specified by Xid.
647 * If create is true, and a transaction doesn't already exist, create it
648 * (with the given LSN, and as top transaction if that's specified);
649 * when this happens, is_new is set to true.
650 */
651static ReorderBufferTXN *
653 bool *is_new, XLogRecPtr lsn, bool create_as_top)
654{
655 ReorderBufferTXN *txn;
657 bool found;
658
660
661 /*
662 * Check the one-entry lookup cache first
663 */
664 if (TransactionIdIsValid(rb->by_txn_last_xid) &&
665 rb->by_txn_last_xid == xid)
666 {
667 txn = rb->by_txn_last_txn;
668
669 if (txn != NULL)
670 {
671 /* found it, and it's valid */
672 if (is_new)
673 *is_new = false;
674 return txn;
675 }
676
677 /*
678 * cached as non-existent, and asked not to create? Then nothing else
679 * to do.
680 */
681 if (!create)
682 return NULL;
683 /* otherwise fall through to create it */
684 }
685
686 /*
687 * If the cache wasn't hit or it yielded a "does-not-exist" and we want to
688 * create an entry.
689 */
690
691 /* search the lookup table */
693 hash_search(rb->by_txn,
694 &xid,
695 create ? HASH_ENTER : HASH_FIND,
696 &found);
697 if (found)
698 txn = ent->txn;
699 else if (create)
700 {
701 /* initialize the new entry, if creation was requested */
702 Assert(ent != NULL);
704
706 ent->txn->xid = xid;
707 txn = ent->txn;
708 txn->first_lsn = lsn;
709 txn->restart_decoding_lsn = rb->current_restart_decoding_lsn;
710
711 if (create_as_top)
712 {
713 dlist_push_tail(&rb->toplevel_by_lsn, &txn->node);
715 }
716 }
717 else
718 txn = NULL; /* not found and not asked to create */
719
720 /* update cache */
721 rb->by_txn_last_xid = xid;
722 rb->by_txn_last_txn = txn;
723
724 if (is_new)
725 *is_new = !found;
726
727 Assert(!create || txn != NULL);
728 return txn;
729}
730
731/*
732 * Record the partial change for the streaming of in-progress transactions. We
733 * can stream only complete changes so if we have a partial change like toast
734 * table insert or speculative insert then we mark such a 'txn' so that it
735 * can't be streamed. We also ensure that if the changes in such a 'txn' can
736 * be streamed and are above logical_decoding_work_mem threshold then we stream
737 * them as soon as we have a complete change.
738 */
739static void
741 ReorderBufferChange *change,
742 bool toast_insert)
743{
744 ReorderBufferTXN *toptxn;
745
746 /*
747 * The partial changes need to be processed only while streaming
748 * in-progress transactions.
749 */
751 return;
752
753 /* Get the top transaction. */
754 toptxn = rbtxn_get_toptxn(txn);
755
756 /*
757 * Indicate a partial change for toast inserts. The change will be
758 * considered as complete once we get the insert or update on the main
759 * table and we are sure that the pending toast chunks are not required
760 * anymore.
761 *
762 * If we allow streaming when there are pending toast chunks then such
763 * chunks won't be released till the insert (multi_insert) is complete and
764 * we expect the txn to have streamed all changes after streaming. This
765 * restriction is mainly to ensure the correctness of streamed
766 * transactions and it doesn't seem worth uplifting such a restriction
767 * just to allow this case because anyway we will stream the transaction
768 * once such an insert is complete.
769 */
770 if (toast_insert)
772 else if (rbtxn_has_partial_change(toptxn) &&
773 IsInsertOrUpdate(change->action) &&
776
777 /*
778 * Indicate a partial change for speculative inserts. The change will be
779 * considered as complete once we get the speculative confirm or abort
780 * token.
781 */
782 if (IsSpecInsert(change->action))
784 else if (rbtxn_has_partial_change(toptxn) &&
787
788 /*
789 * Stream the transaction if it is serialized before and the changes are
790 * now complete in the top-level transaction.
791 *
792 * The reason for doing the streaming of such a transaction as soon as we
793 * get the complete change for it is that previously it would have reached
794 * the memory threshold and wouldn't get streamed because of incomplete
795 * changes. Delaying such transactions would increase apply lag for them.
796 */
798 !(rbtxn_has_partial_change(toptxn)) &&
799 rbtxn_is_serialized(txn) &&
801 ReorderBufferStreamTXN(rb, toptxn);
802}
803
804/*
805 * Queue a change into a transaction so it can be replayed upon commit or will be
806 * streamed when we reach logical_decoding_work_mem threshold.
807 */
808void
810 ReorderBufferChange *change, bool toast_insert)
811{
812 ReorderBufferTXN *txn;
813
814 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
815
816 /*
817 * If we have detected that the transaction is aborted while streaming the
818 * previous changes or by checking its CLOG, there is no point in
819 * collecting further changes for it.
820 */
821 if (rbtxn_is_aborted(txn))
822 {
823 /*
824 * We don't need to update memory accounting for this change as we
825 * have not added it to the queue yet.
826 */
827 ReorderBufferFreeChange(rb, change, false);
828 return;
829 }
830
831 /*
832 * The changes that are sent downstream are considered streamable. We
833 * remember such transactions so that only those will later be considered
834 * for streaming.
835 */
836 if (change->action == REORDER_BUFFER_CHANGE_INSERT ||
842 {
843 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
844
846 }
847
848 change->lsn = lsn;
849 change->txn = txn;
850
852 dlist_push_tail(&txn->changes, &change->node);
853 txn->nentries++;
854 txn->nentries_mem++;
855
856 /* update memory accounting information */
859
860 /* process partial change */
862
863 /* check the memory limits and evict something if needed */
865}
866
867/*
868 * A transactional message is queued to be processed upon commit and a
869 * non-transactional message gets processed immediately.
870 */
871void
874 bool transactional, const char *prefix,
875 Size message_size, const char *message)
876{
877 if (transactional)
878 {
879 MemoryContext oldcontext;
880 ReorderBufferChange *change;
881
883
884 /*
885 * We don't expect snapshots for transactional changes - we'll use the
886 * snapshot derived later during apply (unless the change gets
887 * skipped).
888 */
889 Assert(!snap);
890
891 oldcontext = MemoryContextSwitchTo(rb->context);
892
895 change->data.msg.prefix = pstrdup(prefix);
896 change->data.msg.message_size = message_size;
897 change->data.msg.message = palloc(message_size);
898 memcpy(change->data.msg.message, message, message_size);
899
900 ReorderBufferQueueChange(rb, xid, lsn, change, false);
901
902 MemoryContextSwitchTo(oldcontext);
903 }
904 else
905 {
906 ReorderBufferTXN *txn = NULL;
907 volatile Snapshot snapshot_now = snap;
908
909 /* Non-transactional changes require a valid snapshot. */
910 Assert(snapshot_now);
911
912 if (xid != InvalidTransactionId)
913 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
914
915 /* setup snapshot to allow catalog access */
916 SetupHistoricSnapshot(snapshot_now, NULL);
917 PG_TRY();
918 {
919 rb->message(rb, txn, lsn, false, prefix, message_size, message);
920
922 }
923 PG_CATCH();
924 {
926 PG_RE_THROW();
927 }
928 PG_END_TRY();
929 }
930}
931
932/*
933 * AssertTXNLsnOrder
934 * Verify LSN ordering of transaction lists in the reorderbuffer
935 *
936 * Other LSN-related invariants are checked too.
937 *
938 * No-op if assertions are not in use.
939 */
940static void
942{
943#ifdef USE_ASSERT_CHECKING
944 LogicalDecodingContext *ctx = rb->private_data;
945 dlist_iter iter;
948
949 /*
950 * Skip the verification if we don't reach the LSN at which we start
951 * decoding the contents of transactions yet because until we reach the
952 * LSN, we could have transactions that don't have the association between
953 * the top-level transaction and subtransaction yet and consequently have
954 * the same LSN. We don't guarantee this association until we try to
955 * decode the actual contents of transaction. The ordering of the records
956 * prior to the start_decoding_at LSN should have been checked before the
957 * restart.
958 */
960 return;
961
962 dlist_foreach(iter, &rb->toplevel_by_lsn)
963 {
965 iter.cur);
966
967 /* start LSN must be set */
968 Assert(XLogRecPtrIsValid(cur_txn->first_lsn));
969
970 /* If there is an end LSN, it must be higher than start LSN */
971 if (XLogRecPtrIsValid(cur_txn->end_lsn))
972 Assert(cur_txn->first_lsn <= cur_txn->end_lsn);
973
974 /* Current initial LSN must be strictly higher than previous */
977
978 /* known-as-subtxn txns must not be listed */
980
981 prev_first_lsn = cur_txn->first_lsn;
982 }
983
984 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
985 {
987 base_snapshot_node,
988 iter.cur);
989
990 /* base snapshot (and its LSN) must be set */
991 Assert(cur_txn->base_snapshot != NULL);
992 Assert(XLogRecPtrIsValid(cur_txn->base_snapshot_lsn));
993
994 /* current LSN must be strictly higher than previous */
996 Assert(prev_base_snap_lsn < cur_txn->base_snapshot_lsn);
997
998 /* known-as-subtxn txns must not be listed */
1000
1001 prev_base_snap_lsn = cur_txn->base_snapshot_lsn;
1002 }
1003#endif
1004}
1005
1006/*
1007 * AssertChangeLsnOrder
1008 *
1009 * Check ordering of changes in the (sub)transaction.
1010 */
1011static void
1013{
1014#ifdef USE_ASSERT_CHECKING
1015 dlist_iter iter;
1017
1018 dlist_foreach(iter, &txn->changes)
1019 {
1021
1023
1026 Assert(txn->first_lsn <= cur_change->lsn);
1027
1028 if (XLogRecPtrIsValid(txn->end_lsn))
1029 Assert(cur_change->lsn <= txn->end_lsn);
1030
1032
1033 prev_lsn = cur_change->lsn;
1034 }
1035#endif
1036}
1037
1038/*
1039 * ReorderBufferGetOldestTXN
1040 * Return oldest transaction in reorderbuffer
1041 */
1044{
1045 ReorderBufferTXN *txn;
1046
1048
1049 if (dlist_is_empty(&rb->toplevel_by_lsn))
1050 return NULL;
1051
1052 txn = dlist_head_element(ReorderBufferTXN, node, &rb->toplevel_by_lsn);
1053
1056 return txn;
1057}
1058
1059/*
1060 * ReorderBufferGetOldestXmin
1061 * Return oldest Xmin in reorderbuffer
1062 *
1063 * Returns oldest possibly running Xid from the point of view of snapshots
1064 * used in the transactions kept by reorderbuffer, or InvalidTransactionId if
1065 * there are none.
1066 *
1067 * Since snapshots are assigned monotonically, this equals the Xmin of the
1068 * base snapshot with minimal base_snapshot_lsn.
1069 */
1072{
1073 ReorderBufferTXN *txn;
1074
1076
1077 if (dlist_is_empty(&rb->txns_by_base_snapshot_lsn))
1078 return InvalidTransactionId;
1079
1080 txn = dlist_head_element(ReorderBufferTXN, base_snapshot_node,
1081 &rb->txns_by_base_snapshot_lsn);
1082 return txn->base_snapshot->xmin;
1083}
1084
1085void
1087{
1088 rb->current_restart_decoding_lsn = ptr;
1089}
1090
1091/*
1092 * ReorderBufferAssignChild
1093 *
1094 * Make note that we know that subxid is a subtransaction of xid, seen as of
1095 * the given lsn.
1096 */
1097void
1099 TransactionId subxid, XLogRecPtr lsn)
1100{
1101 ReorderBufferTXN *txn;
1103 bool new_top;
1104 bool new_sub;
1105
1106 txn = ReorderBufferTXNByXid(rb, xid, true, &new_top, lsn, true);
1107 subtxn = ReorderBufferTXNByXid(rb, subxid, true, &new_sub, lsn, false);
1108
1109 if (!new_sub)
1110 {
1112 {
1113 /* already associated, nothing to do */
1114 return;
1115 }
1116 else
1117 {
1118 /*
1119 * We already saw this transaction, but initially added it to the
1120 * list of top-level txns. Now that we know it's not top-level,
1121 * remove it from there.
1122 */
1123 dlist_delete(&subtxn->node);
1124 }
1125 }
1126
1127 subtxn->txn_flags |= RBTXN_IS_SUBXACT;
1128 subtxn->toplevel_xid = xid;
1129 Assert(subtxn->nsubtxns == 0);
1130
1131 /* set the reference to top-level transaction */
1132 subtxn->toptxn = txn;
1133
1134 /* add to subtransaction list */
1135 dlist_push_tail(&txn->subtxns, &subtxn->node);
1136 txn->nsubtxns++;
1137
1138 /* Possibly transfer the subtxn's snapshot to its top-level txn. */
1140
1141 /* Verify LSN-ordering invariant */
1143}
1144
1145/*
1146 * ReorderBufferTransferSnapToParent
1147 * Transfer base snapshot from subtxn to top-level txn, if needed
1148 *
1149 * This is done if the top-level txn doesn't have a base snapshot, or if the
1150 * subtxn's base snapshot has an earlier LSN than the top-level txn's base
1151 * snapshot's LSN. This can happen if there are no changes in the toplevel
1152 * txn but there are some in the subtxn, or the first change in subtxn has
1153 * earlier LSN than first change in the top-level txn and we learned about
1154 * their kinship only now.
1155 *
1156 * The subtransaction's snapshot is cleared regardless of the transfer
1157 * happening, since it's not needed anymore in either case.
1158 *
1159 * We do this as soon as we become aware of their kinship, to avoid queueing
1160 * extra snapshots to txns known-as-subtxns -- only top-level txns will
1161 * receive further snapshots.
1162 */
1163static void
1166{
1167 Assert(subtxn->toplevel_xid == txn->xid);
1168
1169 if (subtxn->base_snapshot != NULL)
1170 {
1171 if (txn->base_snapshot == NULL ||
1172 subtxn->base_snapshot_lsn < txn->base_snapshot_lsn)
1173 {
1174 /*
1175 * If the toplevel transaction already has a base snapshot but
1176 * it's newer than the subxact's, purge it.
1177 */
1178 if (txn->base_snapshot != NULL)
1179 {
1182 }
1183
1184 /*
1185 * The snapshot is now the top transaction's; transfer it, and
1186 * adjust the list position of the top transaction in the list by
1187 * moving it to where the subtransaction is.
1188 */
1189 txn->base_snapshot = subtxn->base_snapshot;
1190 txn->base_snapshot_lsn = subtxn->base_snapshot_lsn;
1191 dlist_insert_before(&subtxn->base_snapshot_node,
1192 &txn->base_snapshot_node);
1193
1194 /*
1195 * The subtransaction doesn't have a snapshot anymore (so it
1196 * mustn't be in the list.)
1197 */
1198 subtxn->base_snapshot = NULL;
1199 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1200 dlist_delete(&subtxn->base_snapshot_node);
1201 }
1202 else
1203 {
1204 /* Base snap of toplevel is fine, so subxact's is not needed */
1205 SnapBuildSnapDecRefcount(subtxn->base_snapshot);
1206 dlist_delete(&subtxn->base_snapshot_node);
1207 subtxn->base_snapshot = NULL;
1208 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1209 }
1210 }
1211}
1212
1213/*
1214 * Associate a subtransaction with its toplevel transaction at commit
1215 * time. There may be no further changes added after this.
1216 */
1217void
1219 TransactionId subxid, XLogRecPtr commit_lsn,
1220 XLogRecPtr end_lsn)
1221{
1223
1224 subtxn = ReorderBufferTXNByXid(rb, subxid, false, NULL,
1225 InvalidXLogRecPtr, false);
1226
1227 /*
1228 * No need to do anything if that subtxn didn't contain any changes
1229 */
1230 if (!subtxn)
1231 return;
1232
1233 subtxn->final_lsn = commit_lsn;
1234 subtxn->end_lsn = end_lsn;
1235
1236 /*
1237 * Assign this subxact as a child of the toplevel xact (no-op if already
1238 * done.)
1239 */
1241}
1242
1243
1244/*
1245 * Support for efficiently iterating over a transaction's and its
1246 * subtransactions' changes.
1247 *
1248 * We do by doing a k-way merge between transactions/subtransactions. For that
1249 * we model the current heads of the different transactions as a binary heap
1250 * so we easily know which (sub-)transaction has the change with the smallest
1251 * lsn next.
1252 *
1253 * We assume the changes in individual transactions are already sorted by LSN.
1254 */
1255
1256/*
1257 * Binary heap comparison function.
1258 */
1259static int
1261{
1263 XLogRecPtr pos_a = state->entries[DatumGetInt32(a)].lsn;
1264 XLogRecPtr pos_b = state->entries[DatumGetInt32(b)].lsn;
1265
1266 if (pos_a < pos_b)
1267 return 1;
1268 else if (pos_a == pos_b)
1269 return 0;
1270 return -1;
1271}
1272
1273/*
1274 * Allocate & initialize an iterator which iterates in lsn order over a
1275 * transaction and all its subtransactions.
1276 *
1277 * Note: The iterator state is returned through iter_state parameter rather
1278 * than the function's return value. This is because the state gets cleaned up
1279 * in a PG_CATCH block in the caller, so we want to make sure the caller gets
1280 * back the state even if this function throws an exception.
1281 */
1282static void
1285{
1286 Size nr_txns = 0;
1289 int32 off;
1290
1291 *iter_state = NULL;
1292
1293 /* Check ordering of changes in the toplevel transaction. */
1295
1296 /*
1297 * Calculate the size of our heap: one element for every transaction that
1298 * contains changes. (Besides the transactions already in the reorder
1299 * buffer, we count the one we were directly passed.)
1300 */
1301 if (txn->nentries > 0)
1302 nr_txns++;
1303
1305 {
1307
1309
1310 /* Check ordering of changes in this subtransaction. */
1312
1313 if (cur_txn->nentries > 0)
1314 nr_txns++;
1315 }
1316
1317 /* allocate iteration state */
1319 MemoryContextAllocZero(rb->context,
1321 sizeof(ReorderBufferIterTXNEntry) * nr_txns);
1322
1323 state->nr_txns = nr_txns;
1324 dlist_init(&state->old_change);
1325
1326 for (off = 0; off < state->nr_txns; off++)
1327 {
1328 state->entries[off].file.vfd = -1;
1329 state->entries[off].segno = 0;
1330 }
1331
1332 /* allocate heap */
1333 state->heap = binaryheap_allocate(state->nr_txns,
1335 state);
1336
1337 /* Now that the state fields are initialized, it is safe to return it. */
1338 *iter_state = state;
1339
1340 /*
1341 * Now insert items into the binary heap, in an unordered fashion. (We
1342 * will run a heap assembly step at the end; this is more efficient.)
1343 */
1344
1345 off = 0;
1346
1347 /* add toplevel transaction if it contains changes */
1348 if (txn->nentries > 0)
1349 {
1351
1352 if (rbtxn_is_serialized(txn))
1353 {
1354 /* serialize remaining changes */
1356 ReorderBufferRestoreChanges(rb, txn, &state->entries[off].file,
1357 &state->entries[off].segno);
1358 }
1359
1361 &txn->changes);
1362
1363 state->entries[off].lsn = cur_change->lsn;
1364 state->entries[off].change = cur_change;
1365 state->entries[off].txn = txn;
1366
1368 }
1369
1370 /* add subtransactions if they contain changes */
1372 {
1374
1376
1377 if (cur_txn->nentries > 0)
1378 {
1380
1382 {
1383 /* serialize remaining changes */
1386 &state->entries[off].file,
1387 &state->entries[off].segno);
1388 }
1390 &cur_txn->changes);
1391
1392 state->entries[off].lsn = cur_change->lsn;
1393 state->entries[off].change = cur_change;
1394 state->entries[off].txn = cur_txn;
1395
1397 }
1398 }
1399
1400 /* assemble a valid binary heap */
1401 binaryheap_build(state->heap);
1402}
1403
1404/*
1405 * Return the next change when iterating over a transaction and its
1406 * subtransactions.
1407 *
1408 * Returns NULL when no further changes exist.
1409 */
1410static ReorderBufferChange *
1412{
1413 ReorderBufferChange *change;
1415 int32 off;
1416
1417 /* nothing there anymore */
1418 if (binaryheap_empty(state->heap))
1419 return NULL;
1420
1421 off = DatumGetInt32(binaryheap_first(state->heap));
1422 entry = &state->entries[off];
1423
1424 /* free memory we might have "leaked" in the previous *Next call */
1425 if (!dlist_is_empty(&state->old_change))
1426 {
1427 change = dlist_container(ReorderBufferChange, node,
1428 dlist_pop_head_node(&state->old_change));
1429 ReorderBufferFreeChange(rb, change, true);
1430 Assert(dlist_is_empty(&state->old_change));
1431 }
1432
1433 change = entry->change;
1434
1435 /*
1436 * update heap with information about which transaction has the next
1437 * relevant change in LSN order
1438 */
1439
1440 /* there are in-memory changes */
1441 if (dlist_has_next(&entry->txn->changes, &entry->change->node))
1442 {
1443 dlist_node *next = dlist_next_node(&entry->txn->changes, &change->node);
1446
1447 /* txn stays the same */
1448 state->entries[off].lsn = next_change->lsn;
1449 state->entries[off].change = next_change;
1450
1452 return change;
1453 }
1454
1455 /* try to load changes from disk */
1456 if (entry->txn->nentries != entry->txn->nentries_mem)
1457 {
1458 /*
1459 * Ugly: restoring changes will reuse *Change records, thus delete the
1460 * current one from the per-tx list and only free in the next call.
1461 */
1462 dlist_delete(&change->node);
1463 dlist_push_tail(&state->old_change, &change->node);
1464
1465 /*
1466 * Update the total bytes processed by the txn for which we are
1467 * releasing the current set of changes and restoring the new set of
1468 * changes.
1469 */
1470 rb->totalBytes += entry->txn->size;
1471 if (ReorderBufferRestoreChanges(rb, entry->txn, &entry->file,
1472 &state->entries[off].segno))
1473 {
1474 /* successfully restored changes from disk */
1477 &entry->txn->changes);
1478
1479 elog(DEBUG2, "restored %u/%u changes from disk",
1480 (uint32) entry->txn->nentries_mem,
1481 (uint32) entry->txn->nentries);
1482
1483 Assert(entry->txn->nentries_mem);
1484 /* txn stays the same */
1485 state->entries[off].lsn = next_change->lsn;
1486 state->entries[off].change = next_change;
1488
1489 return change;
1490 }
1491 }
1492
1493 /* ok, no changes there anymore, remove */
1495
1496 return change;
1497}
1498
1499/*
1500 * Deallocate the iterator
1501 */
1502static void
1505{
1506 int32 off;
1507
1508 for (off = 0; off < state->nr_txns; off++)
1509 {
1510 if (state->entries[off].file.vfd != -1)
1511 FileClose(state->entries[off].file.vfd);
1512 }
1513
1514 /* free memory we might have "leaked" in the last *Next call */
1515 if (!dlist_is_empty(&state->old_change))
1516 {
1517 ReorderBufferChange *change;
1518
1519 change = dlist_container(ReorderBufferChange, node,
1520 dlist_pop_head_node(&state->old_change));
1521 ReorderBufferFreeChange(rb, change, true);
1522 Assert(dlist_is_empty(&state->old_change));
1523 }
1524
1525 binaryheap_free(state->heap);
1526 pfree(state);
1527}
1528
1529/*
1530 * Cleanup the contents of a transaction, usually after the transaction
1531 * committed or aborted.
1532 */
1533static void
1535{
1536 bool found;
1537 dlist_mutable_iter iter;
1538 Size mem_freed = 0;
1539
1540 /* cleanup subtransactions & their changes */
1541 dlist_foreach_modify(iter, &txn->subtxns)
1542 {
1544
1546
1547 /*
1548 * Subtransactions are always associated to the toplevel TXN, even if
1549 * they originally were happening inside another subtxn, so we won't
1550 * ever recurse more than one level deep here.
1551 */
1553 Assert(subtxn->nsubtxns == 0);
1554
1556 }
1557
1558 /* cleanup changes in the txn */
1559 dlist_foreach_modify(iter, &txn->changes)
1560 {
1561 ReorderBufferChange *change;
1562
1563 change = dlist_container(ReorderBufferChange, node, iter.cur);
1564
1565 /* Check we're not mixing changes from different transactions. */
1566 Assert(change->txn == txn);
1567
1568 /*
1569 * Instead of updating the memory counter for individual changes, we
1570 * sum up the size of memory to free so we can update the memory
1571 * counter all together below. This saves costs of maintaining the
1572 * max-heap.
1573 */
1575
1576 ReorderBufferFreeChange(rb, change, false);
1577 }
1578
1579 /* Update the memory counter */
1581
1582 /*
1583 * Cleanup the tuplecids we stored for decoding catalog snapshot access.
1584 * They are always stored in the toplevel transaction.
1585 */
1586 dlist_foreach_modify(iter, &txn->tuplecids)
1587 {
1588 ReorderBufferChange *change;
1589
1590 change = dlist_container(ReorderBufferChange, node, iter.cur);
1591
1592 /* Check we're not mixing changes from different transactions. */
1593 Assert(change->txn == txn);
1595
1596 ReorderBufferFreeChange(rb, change, true);
1597 }
1598
1599 /*
1600 * Cleanup the base snapshot, if set.
1601 */
1602 if (txn->base_snapshot != NULL)
1603 {
1606 }
1607
1608 /*
1609 * Cleanup the snapshot for the last streamed run.
1610 */
1611 if (txn->snapshot_now != NULL)
1612 {
1615 }
1616
1617 /*
1618 * Remove TXN from its containing lists.
1619 *
1620 * Note: if txn is known as subxact, we are deleting the TXN from its
1621 * parent's list of known subxacts; this leaves the parent's nsubxacts
1622 * count too high, but we don't care. Otherwise, we are deleting the TXN
1623 * from the LSN-ordered list of toplevel TXNs. We remove the TXN from the
1624 * list of catalog modifying transactions as well.
1625 */
1626 dlist_delete(&txn->node);
1628 dclist_delete_from(&rb->catchange_txns, &txn->catchange_node);
1629
1630 /* now remove reference from buffer */
1631 hash_search(rb->by_txn, &txn->xid, HASH_REMOVE, &found);
1632 Assert(found);
1633
1634 /* remove entries spilled to disk */
1635 if (rbtxn_is_serialized(txn))
1637
1638 /* deallocate */
1640}
1641
1642/*
1643 * Discard changes from a transaction (and subtransactions), either after
1644 * streaming, decoding them at PREPARE, or detecting the transaction abort.
1645 * Keep the remaining info - transactions, tuplecids, invalidations and
1646 * snapshots.
1647 *
1648 * We additionally remove tuplecids after decoding the transaction at prepare
1649 * time as we only need to perform invalidation at rollback or commit prepared.
1650 *
1651 * 'txn_prepared' indicates that we have decoded the transaction at prepare
1652 * time.
1653 */
1654static void
1656{
1657 dlist_mutable_iter iter;
1658 Size mem_freed = 0;
1659
1660 /* cleanup subtransactions & their changes */
1661 dlist_foreach_modify(iter, &txn->subtxns)
1662 {
1664
1666
1667 /*
1668 * Subtransactions are always associated to the toplevel TXN, even if
1669 * they originally were happening inside another subtxn, so we won't
1670 * ever recurse more than one level deep here.
1671 */
1673 Assert(subtxn->nsubtxns == 0);
1674
1677 }
1678
1679 /* cleanup changes in the txn */
1680 dlist_foreach_modify(iter, &txn->changes)
1681 {
1682 ReorderBufferChange *change;
1683
1684 change = dlist_container(ReorderBufferChange, node, iter.cur);
1685
1686 /* Check we're not mixing changes from different transactions. */
1687 Assert(change->txn == txn);
1688
1689 /* remove the change from its containing list */
1690 dlist_delete(&change->node);
1691
1692 /*
1693 * Instead of updating the memory counter for individual changes, we
1694 * sum up the size of memory to free so we can update the memory
1695 * counter all together below. This saves costs of maintaining the
1696 * max-heap.
1697 */
1699
1700 ReorderBufferFreeChange(rb, change, false);
1701 }
1702
1703 /* Update the memory counter */
1705
1706 if (txn_prepared)
1707 {
1708 /*
1709 * If this is a prepared txn, cleanup the tuplecids we stored for
1710 * decoding catalog snapshot access. They are always stored in the
1711 * toplevel transaction.
1712 */
1713 dlist_foreach_modify(iter, &txn->tuplecids)
1714 {
1715 ReorderBufferChange *change;
1716
1717 change = dlist_container(ReorderBufferChange, node, iter.cur);
1718
1719 /* Check we're not mixing changes from different transactions. */
1720 Assert(change->txn == txn);
1722
1723 /* Remove the change from its containing list. */
1724 dlist_delete(&change->node);
1725
1726 ReorderBufferFreeChange(rb, change, true);
1727 }
1728 }
1729
1730 /*
1731 * Destroy the (relfilelocator, ctid) hashtable, so that we don't leak any
1732 * memory. We could also keep the hash table and update it with new ctid
1733 * values, but this seems simpler and good enough for now.
1734 */
1735 if (txn->tuplecid_hash != NULL)
1736 {
1738 txn->tuplecid_hash = NULL;
1739 }
1740
1741 /* If this txn is serialized then clean the disk space. */
1742 if (rbtxn_is_serialized(txn))
1743 {
1746
1747 /*
1748 * We set this flag to indicate if the transaction is ever serialized.
1749 * We need this to accurately update the stats as otherwise the same
1750 * transaction can be counted as serialized multiple times.
1751 */
1753 }
1754
1755 /* also reset the number of entries in the transaction */
1756 txn->nentries_mem = 0;
1757 txn->nentries = 0;
1758}
1759
1760/*
1761 * Check the transaction status by CLOG lookup and discard all changes if
1762 * the transaction is aborted. The transaction status is cached in
1763 * txn->txn_flags so we can skip future changes and avoid CLOG lookups on the
1764 * next call.
1765 *
1766 * Return true if the transaction is aborted, otherwise return false.
1767 *
1768 * When the 'debug_logical_replication_streaming' is set to "immediate", we
1769 * don't check the transaction status, meaning the caller will always process
1770 * this transaction.
1771 */
1772static bool
1774{
1775 /* Quick return for regression tests */
1777 return false;
1778
1779 /*
1780 * Quick return if the transaction status is already known.
1781 */
1782
1783 if (rbtxn_is_committed(txn))
1784 return false;
1785 if (rbtxn_is_aborted(txn))
1786 {
1787 /* Already-aborted transactions should not have any changes */
1788 Assert(txn->size == 0);
1789
1790 return true;
1791 }
1792
1793 /* Otherwise, check the transaction status using CLOG lookup */
1794
1796 return false;
1797
1798 if (TransactionIdDidCommit(txn->xid))
1799 {
1800 /*
1801 * Remember the transaction is committed so that we can skip CLOG
1802 * check next time, avoiding the pressure on CLOG lookup.
1803 */
1804 Assert(!rbtxn_is_aborted(txn));
1806 return false;
1807 }
1808
1809 /*
1810 * The transaction aborted. We discard both the changes collected so far
1811 * and the toast reconstruction data. The full cleanup will happen as part
1812 * of decoding ABORT record of this transaction.
1813 */
1816
1817 /* All changes should be discarded */
1818 Assert(txn->size == 0);
1819
1820 /*
1821 * Mark the transaction as aborted so we can ignore future changes of this
1822 * transaction.
1823 */
1826
1827 return true;
1828}
1829
1830/*
1831 * Build a hash with a (relfilelocator, ctid) -> (cmin, cmax) mapping for use by
1832 * HeapTupleSatisfiesHistoricMVCC.
1833 */
1834static void
1836{
1837 dlist_iter iter;
1839
1841 return;
1842
1844 hash_ctl.entrysize = sizeof(ReorderBufferTupleCidEnt);
1845 hash_ctl.hcxt = rb->context;
1846
1847 /*
1848 * create the hash with the exact number of to-be-stored tuplecids from
1849 * the start
1850 */
1851 txn->tuplecid_hash =
1852 hash_create("ReorderBufferTupleCid", txn->ntuplecids, &hash_ctl,
1854
1855 dlist_foreach(iter, &txn->tuplecids)
1856 {
1859 bool found;
1860 ReorderBufferChange *change;
1861
1862 change = dlist_container(ReorderBufferChange, node, iter.cur);
1863
1865
1866 /* be careful about padding */
1867 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
1868
1869 key.rlocator = change->data.tuplecid.locator;
1870
1872 &key.tid);
1873
1875 hash_search(txn->tuplecid_hash, &key, HASH_ENTER, &found);
1876 if (!found)
1877 {
1878 ent->cmin = change->data.tuplecid.cmin;
1879 ent->cmax = change->data.tuplecid.cmax;
1880 ent->combocid = change->data.tuplecid.combocid;
1881 }
1882 else
1883 {
1884 /*
1885 * Maybe we already saw this tuple before in this transaction, but
1886 * if so it must have the same cmin.
1887 */
1888 Assert(ent->cmin == change->data.tuplecid.cmin);
1889
1890 /*
1891 * cmax may be initially invalid, but once set it can only grow,
1892 * and never become invalid again.
1893 */
1894 Assert((ent->cmax == InvalidCommandId) ||
1895 ((change->data.tuplecid.cmax != InvalidCommandId) &&
1896 (change->data.tuplecid.cmax > ent->cmax)));
1897 ent->cmax = change->data.tuplecid.cmax;
1898 }
1899 }
1900}
1901
1902/*
1903 * Copy a provided snapshot so we can modify it privately. This is needed so
1904 * that catalog modifying transactions can look into intermediate catalog
1905 * states.
1906 */
1907static Snapshot
1910{
1911 Snapshot snap;
1912 dlist_iter iter;
1913 int i = 0;
1914 Size size;
1915
1916 size = sizeof(SnapshotData) +
1917 sizeof(TransactionId) * orig_snap->xcnt +
1918 sizeof(TransactionId) * (txn->nsubtxns + 1);
1919
1920 snap = MemoryContextAllocZero(rb->context, size);
1921 memcpy(snap, orig_snap, sizeof(SnapshotData));
1922
1923 snap->copied = true;
1924 snap->active_count = 1; /* mark as active so nobody frees it */
1925 snap->regd_count = 0;
1926 snap->xip = (TransactionId *) (snap + 1);
1927
1928 memcpy(snap->xip, orig_snap->xip, sizeof(TransactionId) * snap->xcnt);
1929
1930 /*
1931 * snap->subxip contains all txids that belong to our transaction which we
1932 * need to check via cmin/cmax. That's why we store the toplevel
1933 * transaction in there as well.
1934 */
1935 snap->subxip = snap->xip + snap->xcnt;
1936 snap->subxip[i++] = txn->xid;
1937
1938 /*
1939 * txn->nsubtxns isn't decreased when subtransactions abort, so count
1940 * manually. Since it's an upper boundary it is safe to use it for the
1941 * allocation above.
1942 */
1943 snap->subxcnt = 1;
1944
1945 dlist_foreach(iter, &txn->subtxns)
1946 {
1948
1950 snap->subxip[i++] = sub_txn->xid;
1951 snap->subxcnt++;
1952 }
1953
1954 /* sort so we can bsearch() later */
1955 qsort(snap->subxip, snap->subxcnt, sizeof(TransactionId), xidComparator);
1956
1957 /* store the specified current CommandId */
1958 snap->curcid = cid;
1959
1960 return snap;
1961}
1962
1963/*
1964 * Free a previously ReorderBufferCopySnap'ed snapshot
1965 */
1966static void
1968{
1969 if (snap->copied)
1970 pfree(snap);
1971 else
1973}
1974
1975/*
1976 * If the transaction was (partially) streamed, we need to prepare or commit
1977 * it in a 'streamed' way. That is, we first stream the remaining part of the
1978 * transaction, and then invoke stream_prepare or stream_commit message as per
1979 * the case.
1980 */
1981static void
1983{
1984 /* we should only call this for previously streamed transactions */
1986
1988
1989 if (rbtxn_is_prepared(txn))
1990 {
1991 /*
1992 * Note, we send stream prepare even if a concurrent abort is
1993 * detected. See DecodePrepare for more information.
1994 */
1996 rb->stream_prepare(rb, txn, txn->final_lsn);
1998
1999 /*
2000 * This is a PREPARED transaction, part of a two-phase commit. The
2001 * full cleanup will happen as part of the COMMIT PREPAREDs, so now
2002 * just truncate txn by removing changes and tuplecids.
2003 */
2004 ReorderBufferTruncateTXN(rb, txn, true);
2005 /* Reset the CheckXidAlive */
2007 }
2008 else
2009 {
2010 rb->stream_commit(rb, txn, txn->final_lsn);
2012 }
2013}
2014
2015/*
2016 * Set xid to detect concurrent aborts.
2017 *
2018 * While streaming an in-progress transaction or decoding a prepared
2019 * transaction there is a possibility that the (sub)transaction might get
2020 * aborted concurrently. In such case if the (sub)transaction has catalog
2021 * update then we might decode the tuple using wrong catalog version. For
2022 * example, suppose there is one catalog tuple with (xmin: 500, xmax: 0). Now,
2023 * the transaction 501 updates the catalog tuple and after that we will have
2024 * two tuples (xmin: 500, xmax: 501) and (xmin: 501, xmax: 0). Now, if 501 is
2025 * aborted and some other transaction say 502 updates the same catalog tuple
2026 * then the first tuple will be changed to (xmin: 500, xmax: 502). So, the
2027 * problem is that when we try to decode the tuple inserted/updated in 501
2028 * after the catalog update, we will see the catalog tuple with (xmin: 500,
2029 * xmax: 502) as visible because it will consider that the tuple is deleted by
2030 * xid 502 which is not visible to our snapshot. And when we will try to
2031 * decode with that catalog tuple, it can lead to a wrong result or a crash.
2032 * So, it is necessary to detect concurrent aborts to allow streaming of
2033 * in-progress transactions or decoding of prepared transactions.
2034 *
2035 * For detecting the concurrent abort we set CheckXidAlive to the current
2036 * (sub)transaction's xid for which this change belongs to. And, during
2037 * catalog scan we can check the status of the xid and if it is aborted we will
2038 * report a specific error so that we can stop streaming current transaction
2039 * and discard the already streamed changes on such an error. We might have
2040 * already streamed some of the changes for the aborted (sub)transaction, but
2041 * that is fine because when we decode the abort we will stream abort message
2042 * to truncate the changes in the subscriber. Similarly, for prepared
2043 * transactions, we stop decoding if concurrent abort is detected and then
2044 * rollback the changes when rollback prepared is encountered. See
2045 * DecodePrepare.
2046 */
2047static inline void
2049{
2050 /*
2051 * If the input transaction id is already set as a CheckXidAlive then
2052 * nothing to do.
2053 */
2055 return;
2056
2057 /*
2058 * setup CheckXidAlive if it's not committed yet. We don't check if the
2059 * xid is aborted. That will happen during catalog access.
2060 */
2061 if (!TransactionIdDidCommit(xid))
2062 CheckXidAlive = xid;
2063 else
2065}
2066
2067/*
2068 * Helper function for ReorderBufferProcessTXN for applying change.
2069 */
2070static inline void
2072 Relation relation, ReorderBufferChange *change,
2073 bool streaming)
2074{
2075 if (streaming)
2076 rb->stream_change(rb, txn, relation, change);
2077 else
2078 rb->apply_change(rb, txn, relation, change);
2079}
2080
2081/*
2082 * Helper function for ReorderBufferProcessTXN for applying the truncate.
2083 */
2084static inline void
2086 int nrelations, Relation *relations,
2087 ReorderBufferChange *change, bool streaming)
2088{
2089 if (streaming)
2090 rb->stream_truncate(rb, txn, nrelations, relations, change);
2091 else
2092 rb->apply_truncate(rb, txn, nrelations, relations, change);
2093}
2094
2095/*
2096 * Helper function for ReorderBufferProcessTXN for applying the message.
2097 */
2098static inline void
2100 ReorderBufferChange *change, bool streaming)
2101{
2102 if (streaming)
2103 rb->stream_message(rb, txn, change->lsn, true,
2104 change->data.msg.prefix,
2105 change->data.msg.message_size,
2106 change->data.msg.message);
2107 else
2108 rb->message(rb, txn, change->lsn, true,
2109 change->data.msg.prefix,
2110 change->data.msg.message_size,
2111 change->data.msg.message);
2112}
2113
2114/*
2115 * Function to store the command id and snapshot at the end of the current
2116 * stream so that we can reuse the same while sending the next stream.
2117 */
2118static inline void
2120 Snapshot snapshot_now, CommandId command_id)
2121{
2122 txn->command_id = command_id;
2123
2124 /* Avoid copying if it's already copied. */
2125 if (snapshot_now->copied)
2126 txn->snapshot_now = snapshot_now;
2127 else
2128 txn->snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2129 txn, command_id);
2130}
2131
2132/*
2133 * Mark the given transaction as streamed if it's a top-level transaction
2134 * or has changes.
2135 */
2136static void
2138{
2139 /*
2140 * The top-level transaction, is marked as streamed always, even if it
2141 * does not contain any changes (that is, when all the changes are in
2142 * subtransactions).
2143 *
2144 * For subtransactions, we only mark them as streamed when there are
2145 * changes in them.
2146 *
2147 * We do it this way because of aborts - we don't want to send aborts for
2148 * XIDs the downstream is not aware of. And of course, it always knows
2149 * about the top-level xact (we send the XID in all messages), but we
2150 * never stream XIDs of empty subxacts.
2151 */
2152 if (rbtxn_is_toptxn(txn) || (txn->nentries_mem != 0))
2154}
2155
2156/*
2157 * Helper function for ReorderBufferProcessTXN to handle the concurrent
2158 * abort of the streaming transaction. This resets the TXN such that it
2159 * can be used to stream the remaining data of transaction being processed.
2160 * This can happen when the subtransaction is aborted and we still want to
2161 * continue processing the main or other subtransactions data.
2162 */
2163static void
2165 Snapshot snapshot_now,
2166 CommandId command_id,
2167 XLogRecPtr last_lsn,
2169{
2170 /* Discard the changes that we just streamed */
2172
2173 /* Free all resources allocated for toast reconstruction */
2175
2176 /* Return the spec insert change if it is not NULL */
2177 if (specinsert != NULL)
2178 {
2180 specinsert = NULL;
2181 }
2182
2183 /*
2184 * For the streaming case, stop the stream and remember the command ID and
2185 * snapshot for the streaming run.
2186 */
2187 if (rbtxn_is_streamed(txn))
2188 {
2189 rb->stream_stop(rb, txn, last_lsn);
2190 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2191 }
2192
2193 /* All changes must be deallocated */
2194 Assert(txn->size == 0);
2195}
2196
2197/*
2198 * Helper function for ReorderBufferReplay and ReorderBufferStreamTXN.
2199 *
2200 * Send data of a transaction (and its subtransactions) to the
2201 * output plugin. We iterate over the top and subtransactions (using a k-way
2202 * merge) and replay the changes in lsn order.
2203 *
2204 * If streaming is true then data will be sent using stream API.
2205 *
2206 * Note: "volatile" markers on some parameters are to avoid trouble with
2207 * PG_TRY inside the function.
2208 */
2209static void
2211 XLogRecPtr commit_lsn,
2212 volatile Snapshot snapshot_now,
2213 volatile CommandId command_id,
2214 bool streaming)
2215{
2216 bool using_subtxn;
2222 volatile bool stream_started = false;
2223 ReorderBufferTXN *volatile curtxn = NULL;
2224
2225 /* build data to be able to lookup the CommandIds of catalog tuples */
2227
2228 /* setup the initial snapshot */
2229 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2230
2231 /*
2232 * Decoding needs access to syscaches et al., which in turn use
2233 * heavyweight locks and such. Thus we need to have enough state around to
2234 * keep track of those. The easiest way is to simply use a transaction
2235 * internally. That also allows us to easily enforce that nothing writes
2236 * to the database by checking for xid assignments.
2237 *
2238 * When we're called via the SQL SRF there's already a transaction
2239 * started, so start an explicit subtransaction there.
2240 */
2242
2243 PG_TRY();
2244 {
2245 ReorderBufferChange *change;
2246 int changes_count = 0; /* used to accumulate the number of
2247 * changes */
2248
2249 if (using_subtxn)
2250 BeginInternalSubTransaction(streaming ? "stream" : "replay");
2251 else
2253
2254 /*
2255 * We only need to send begin/begin-prepare for non-streamed
2256 * transactions.
2257 */
2258 if (!streaming)
2259 {
2260 if (rbtxn_is_prepared(txn))
2261 rb->begin_prepare(rb, txn);
2262 else
2263 rb->begin(rb, txn);
2264 }
2265
2267 while ((change = ReorderBufferIterTXNNext(rb, iterstate)) != NULL)
2268 {
2269 Relation relation = NULL;
2270 Oid reloid;
2271
2273
2274 /*
2275 * We can't call start stream callback before processing first
2276 * change.
2277 */
2279 {
2280 if (streaming)
2281 {
2282 txn->origin_id = change->origin_id;
2283 rb->stream_start(rb, txn, change->lsn);
2284 stream_started = true;
2285 }
2286 }
2287
2288 /*
2289 * Enforce correct ordering of changes, merged from multiple
2290 * subtransactions. The changes may have the same LSN due to
2291 * MULTI_INSERT xlog records.
2292 */
2294
2295 prev_lsn = change->lsn;
2296
2297 /*
2298 * Set the current xid to detect concurrent aborts. This is
2299 * required for the cases when we decode the changes before the
2300 * COMMIT record is processed.
2301 */
2302 if (streaming || rbtxn_is_prepared(change->txn))
2303 {
2304 curtxn = change->txn;
2306 }
2307
2308 switch (change->action)
2309 {
2311
2312 /*
2313 * Confirmation for speculative insertion arrived. Simply
2314 * use as a normal record. It'll be cleaned up at the end
2315 * of INSERT processing.
2316 */
2317 if (specinsert == NULL)
2318 elog(ERROR, "invalid ordering of speculative insertion changes");
2319 Assert(specinsert->data.tp.oldtuple == NULL);
2320 change = specinsert;
2322
2323 /* intentionally fall through */
2327 Assert(snapshot_now);
2328
2329 reloid = RelidByRelfilenumber(change->data.tp.rlocator.spcOid,
2330 change->data.tp.rlocator.relNumber);
2331
2332 /*
2333 * Mapped catalog tuple without data, emitted while
2334 * catalog table was in the process of being rewritten. We
2335 * can fail to look up the relfilenumber, because the
2336 * relmapper has no "historic" view, in contrast to the
2337 * normal catalog during decoding. Thus repeated rewrites
2338 * can cause a lookup failure. That's OK because we do not
2339 * decode catalog changes anyway. Normally such tuples
2340 * would be skipped over below, but we can't identify
2341 * whether the table should be logically logged without
2342 * mapping the relfilenumber to the oid.
2343 */
2344 if (reloid == InvalidOid &&
2345 change->data.tp.newtuple == NULL &&
2346 change->data.tp.oldtuple == NULL)
2347 goto change_done;
2348 else if (reloid == InvalidOid)
2349 elog(ERROR, "could not map filenumber \"%s\" to relation OID",
2350 relpathperm(change->data.tp.rlocator,
2351 MAIN_FORKNUM).str);
2352
2353 relation = RelationIdGetRelation(reloid);
2354
2355 if (!RelationIsValid(relation))
2356 elog(ERROR, "could not open relation with OID %u (for filenumber \"%s\")",
2357 reloid,
2358 relpathperm(change->data.tp.rlocator,
2359 MAIN_FORKNUM).str);
2360
2361 if (!RelationIsLogicallyLogged(relation))
2362 goto change_done;
2363
2364 /*
2365 * Ignore temporary heaps created during DDL unless the
2366 * plugin has asked for them.
2367 */
2368 if (relation->rd_rel->relrewrite && !rb->output_rewrites)
2369 goto change_done;
2370
2371 /*
2372 * For now ignore sequence changes entirely. Most of the
2373 * time they don't log changes using records we
2374 * understand, so it doesn't make sense to handle the few
2375 * cases we do.
2376 */
2377 if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
2378 goto change_done;
2379
2380 /* user-triggered change */
2381 if (!IsToastRelation(relation))
2382 {
2383 ReorderBufferToastReplace(rb, txn, relation, change);
2384 ReorderBufferApplyChange(rb, txn, relation, change,
2385 streaming);
2386
2387 /*
2388 * Only clear reassembled toast chunks if we're sure
2389 * they're not required anymore. The creator of the
2390 * tuple tells us.
2391 */
2392 if (change->data.tp.clear_toast_afterwards)
2394 }
2395 /* we're not interested in toast deletions */
2396 else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
2397 {
2398 /*
2399 * Need to reassemble the full toasted Datum in
2400 * memory, to ensure the chunks don't get reused till
2401 * we're done remove it from the list of this
2402 * transaction's changes. Otherwise it will get
2403 * freed/reused while restoring spooled data from
2404 * disk.
2405 */
2406 Assert(change->data.tp.newtuple != NULL);
2407
2408 dlist_delete(&change->node);
2409 ReorderBufferToastAppendChunk(rb, txn, relation,
2410 change);
2411 }
2412
2414
2415 /*
2416 * If speculative insertion was confirmed, the record
2417 * isn't needed anymore.
2418 */
2419 if (specinsert != NULL)
2420 {
2422 specinsert = NULL;
2423 }
2424
2425 if (RelationIsValid(relation))
2426 {
2427 RelationClose(relation);
2428 relation = NULL;
2429 }
2430 break;
2431
2433
2434 /*
2435 * Speculative insertions are dealt with by delaying the
2436 * processing of the insert until the confirmation record
2437 * arrives. For that we simply unlink the record from the
2438 * chain, so it does not get freed/reused while restoring
2439 * spooled data from disk.
2440 *
2441 * This is safe in the face of concurrent catalog changes
2442 * because the relevant relation can't be changed between
2443 * speculative insertion and confirmation due to
2444 * CheckTableNotInUse() and locking.
2445 */
2446
2447 /* clear out a pending (and thus failed) speculation */
2448 if (specinsert != NULL)
2449 {
2451 specinsert = NULL;
2452 }
2453
2454 /* and memorize the pending insertion */
2455 dlist_delete(&change->node);
2456 specinsert = change;
2457 break;
2458
2460
2461 /*
2462 * Abort for speculative insertion arrived. So cleanup the
2463 * specinsert tuple and toast hash.
2464 *
2465 * Note that we get the spec abort change for each toast
2466 * entry but we need to perform the cleanup only the first
2467 * time we get it for the main table.
2468 */
2469 if (specinsert != NULL)
2470 {
2471 /*
2472 * We must clean the toast hash before processing a
2473 * completely new tuple to avoid confusion about the
2474 * previous tuple's toast chunks.
2475 */
2478
2479 /* We don't need this record anymore. */
2481 specinsert = NULL;
2482 }
2483 break;
2484
2486 {
2487 int i;
2488 int nrelids = change->data.truncate.nrelids;
2489 int nrelations = 0;
2490 Relation *relations;
2491
2492 relations = palloc0(nrelids * sizeof(Relation));
2493 for (i = 0; i < nrelids; i++)
2494 {
2495 Oid relid = change->data.truncate.relids[i];
2496 Relation rel;
2497
2498 rel = RelationIdGetRelation(relid);
2499
2500 if (!RelationIsValid(rel))
2501 elog(ERROR, "could not open relation with OID %u", relid);
2502
2503 if (!RelationIsLogicallyLogged(rel))
2504 continue;
2505
2506 relations[nrelations++] = rel;
2507 }
2508
2509 /* Apply the truncate. */
2511 relations, change,
2512 streaming);
2513
2514 for (i = 0; i < nrelations; i++)
2515 RelationClose(relations[i]);
2516
2517 break;
2518 }
2519
2521 ReorderBufferApplyMessage(rb, txn, change, streaming);
2522 break;
2523
2525 /* Execute the invalidation messages locally */
2527 change->data.inval.invalidations);
2528 break;
2529
2531 /* get rid of the old */
2533
2534 if (snapshot_now->copied)
2535 {
2536 ReorderBufferFreeSnap(rb, snapshot_now);
2537 snapshot_now =
2539 txn, command_id);
2540 }
2541
2542 /*
2543 * Restored from disk, need to be careful not to double
2544 * free. We could introduce refcounting for that, but for
2545 * now this seems infrequent enough not to care.
2546 */
2547 else if (change->data.snapshot->copied)
2548 {
2549 snapshot_now =
2551 txn, command_id);
2552 }
2553 else
2554 {
2555 snapshot_now = change->data.snapshot;
2556 }
2557
2558 /* and continue with the new one */
2559 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2560 break;
2561
2564
2565 if (command_id < change->data.command_id)
2566 {
2567 command_id = change->data.command_id;
2568
2569 if (!snapshot_now->copied)
2570 {
2571 /* we don't use the global one anymore */
2572 snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2573 txn, command_id);
2574 }
2575
2576 snapshot_now->curcid = command_id;
2577
2579 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2580 }
2581
2582 break;
2583
2585 elog(ERROR, "tuplecid value in changequeue");
2586 break;
2587 }
2588
2589 /*
2590 * It is possible that the data is not sent to downstream for a
2591 * long time either because the output plugin filtered it or there
2592 * is a DDL that generates a lot of data that is not processed by
2593 * the plugin. So, in such cases, the downstream can timeout. To
2594 * avoid that we try to send a keepalive message if required.
2595 * Trying to send a keepalive message after every change has some
2596 * overhead, but testing showed there is no noticeable overhead if
2597 * we do it after every ~100 changes.
2598 */
2599#define CHANGES_THRESHOLD 100
2600
2602 {
2603 rb->update_progress_txn(rb, txn, prev_lsn);
2604 changes_count = 0;
2605 }
2606 }
2607
2608 /* speculative insertion record must be freed by now */
2610
2611 /* clean up the iterator */
2613 iterstate = NULL;
2614
2615 /*
2616 * Update total transaction count and total bytes processed by the
2617 * transaction and its subtransactions. Ensure to not count the
2618 * streamed transaction multiple times.
2619 *
2620 * Note that the statistics computation has to be done after
2621 * ReorderBufferIterTXNFinish as it releases the serialized change
2622 * which we have already accounted in ReorderBufferIterTXNNext.
2623 */
2624 if (!rbtxn_is_streamed(txn))
2625 rb->totalTxns++;
2626
2627 rb->totalBytes += txn->total_size;
2628
2629 /*
2630 * Done with current changes, send the last message for this set of
2631 * changes depending upon streaming mode.
2632 */
2633 if (streaming)
2634 {
2635 if (stream_started)
2636 {
2637 rb->stream_stop(rb, txn, prev_lsn);
2638 stream_started = false;
2639 }
2640 }
2641 else
2642 {
2643 /*
2644 * Call either PREPARE (for two-phase transactions) or COMMIT (for
2645 * regular ones).
2646 */
2647 if (rbtxn_is_prepared(txn))
2648 {
2650 rb->prepare(rb, txn, commit_lsn);
2652 }
2653 else
2654 rb->commit(rb, txn, commit_lsn);
2655 }
2656
2657 /* this is just a sanity check against bad output plugin behaviour */
2659 elog(ERROR, "output plugin used XID %u",
2661
2662 /*
2663 * Remember the command ID and snapshot for the next set of changes in
2664 * streaming mode.
2665 */
2666 if (streaming)
2667 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2668 else if (snapshot_now->copied)
2669 ReorderBufferFreeSnap(rb, snapshot_now);
2670
2671 /* cleanup */
2673
2674 /*
2675 * Aborting the current (sub-)transaction as a whole has the right
2676 * semantics. We want all locks acquired in here to be released, not
2677 * reassigned to the parent and we do not want any database access
2678 * have persistent effects.
2679 */
2681
2682 /* make sure there's no cache pollution */
2684 {
2687 }
2688 else
2689 {
2693 }
2694
2695 if (using_subtxn)
2696 {
2699 CurrentResourceOwner = cowner;
2700 }
2701
2702 /*
2703 * We are here due to one of the four reasons: 1. Decoding an
2704 * in-progress txn. 2. Decoding a prepared txn. 3. Decoding of a
2705 * prepared txn that was (partially) streamed. 4. Decoding a committed
2706 * txn.
2707 *
2708 * For 1, we allow truncation of txn data by removing the changes
2709 * already streamed but still keeping other things like invalidations,
2710 * snapshot, and tuplecids. For 2 and 3, we indicate
2711 * ReorderBufferTruncateTXN to do more elaborate truncation of txn
2712 * data as the entire transaction has been decoded except for commit.
2713 * For 4, as the entire txn has been decoded, we can fully clean up
2714 * the TXN reorder buffer.
2715 */
2716 if (streaming || rbtxn_is_prepared(txn))
2717 {
2718 if (streaming)
2720
2722 /* Reset the CheckXidAlive */
2724 }
2725 else
2727 }
2728 PG_CATCH();
2729 {
2732
2733 /* TODO: Encapsulate cleanup from the PG_TRY and PG_CATCH blocks */
2734 if (iterstate)
2736
2738
2739 /*
2740 * Force cache invalidation to happen outside of a valid transaction
2741 * to prevent catalog access as we just caught an error.
2742 */
2744
2745 /* make sure there's no cache pollution */
2747 {
2750 }
2751 else
2752 {
2756 }
2757
2758 if (using_subtxn)
2759 {
2762 CurrentResourceOwner = cowner;
2763 }
2764
2765 /*
2766 * The error code ERRCODE_TRANSACTION_ROLLBACK indicates a concurrent
2767 * abort of the (sub)transaction we are streaming or preparing. We
2768 * need to do the cleanup and return gracefully on this error, see
2769 * SetupCheckXidLive.
2770 *
2771 * This error code can be thrown by one of the callbacks we call
2772 * during decoding so we need to ensure that we return gracefully only
2773 * when we are sending the data in streaming mode and the streaming is
2774 * not finished yet or when we are sending the data out on a PREPARE
2775 * during a two-phase commit.
2776 */
2777 if (errdata->sqlerrcode == ERRCODE_TRANSACTION_ROLLBACK &&
2779 {
2780 /* curtxn must be set for streaming or prepared transactions */
2781 Assert(curtxn);
2782
2783 /* Cleanup the temporary error state. */
2786 errdata = NULL;
2787
2788 /* Remember the transaction is aborted. */
2790 curtxn->txn_flags |= RBTXN_IS_ABORTED;
2791
2792 /* Mark the transaction is streamed if appropriate */
2793 if (stream_started)
2795
2796 /* Reset the TXN so that it is allowed to stream remaining data. */
2797 ReorderBufferResetTXN(rb, txn, snapshot_now,
2798 command_id, prev_lsn,
2799 specinsert);
2800 }
2801 else
2802 {
2805 PG_RE_THROW();
2806 }
2807 }
2808 PG_END_TRY();
2809}
2810
2811/*
2812 * Perform the replay of a transaction and its non-aborted subtransactions.
2813 *
2814 * Subtransactions previously have to be processed by
2815 * ReorderBufferCommitChild(), even if previously assigned to the toplevel
2816 * transaction with ReorderBufferAssignChild.
2817 *
2818 * This interface is called once a prepare or toplevel commit is read for both
2819 * streamed as well as non-streamed transactions.
2820 */
2821static void
2824 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
2825 TimestampTz commit_time,
2826 RepOriginId origin_id, XLogRecPtr origin_lsn)
2827{
2828 Snapshot snapshot_now;
2829 CommandId command_id = FirstCommandId;
2830
2831 txn->final_lsn = commit_lsn;
2832 txn->end_lsn = end_lsn;
2833 txn->commit_time = commit_time;
2834 txn->origin_id = origin_id;
2835 txn->origin_lsn = origin_lsn;
2836
2837 /*
2838 * If the transaction was (partially) streamed, we need to commit it in a
2839 * 'streamed' way. That is, we first stream the remaining part of the
2840 * transaction, and then invoke stream_commit message.
2841 *
2842 * Called after everything (origin ID, LSN, ...) is stored in the
2843 * transaction to avoid passing that information directly.
2844 */
2845 if (rbtxn_is_streamed(txn))
2846 {
2848 return;
2849 }
2850
2851 /*
2852 * If this transaction has no snapshot, it didn't make any changes to the
2853 * database, so there's nothing to decode. Note that
2854 * ReorderBufferCommitChild will have transferred any snapshots from
2855 * subtransactions if there were any.
2856 */
2857 if (txn->base_snapshot == NULL)
2858 {
2859 Assert(txn->ninvalidations == 0);
2860
2861 /*
2862 * Removing this txn before a commit might result in the computation
2863 * of an incorrect restart_lsn. See SnapBuildProcessRunningXacts.
2864 */
2865 if (!rbtxn_is_prepared(txn))
2867 return;
2868 }
2869
2870 snapshot_now = txn->base_snapshot;
2871
2872 /* Process and send the changes to output plugin. */
2873 ReorderBufferProcessTXN(rb, txn, commit_lsn, snapshot_now,
2874 command_id, false);
2875}
2876
2877/*
2878 * Commit a transaction.
2879 *
2880 * See comments for ReorderBufferReplay().
2881 */
2882void
2884 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
2885 TimestampTz commit_time,
2886 RepOriginId origin_id, XLogRecPtr origin_lsn)
2887{
2888 ReorderBufferTXN *txn;
2889
2890 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2891 false);
2892
2893 /* unknown transaction, nothing to replay */
2894 if (txn == NULL)
2895 return;
2896
2897 ReorderBufferReplay(txn, rb, xid, commit_lsn, end_lsn, commit_time,
2898 origin_id, origin_lsn);
2899}
2900
2901/*
2902 * Record the prepare information for a transaction. Also, mark the transaction
2903 * as a prepared transaction.
2904 */
2905bool
2907 XLogRecPtr prepare_lsn, XLogRecPtr end_lsn,
2908 TimestampTz prepare_time,
2909 RepOriginId origin_id, XLogRecPtr origin_lsn)
2910{
2911 ReorderBufferTXN *txn;
2912
2913 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2914
2915 /* unknown transaction, nothing to do */
2916 if (txn == NULL)
2917 return false;
2918
2919 /*
2920 * Remember the prepare information to be later used by commit prepared in
2921 * case we skip doing prepare.
2922 */
2923 txn->final_lsn = prepare_lsn;
2924 txn->end_lsn = end_lsn;
2925 txn->prepare_time = prepare_time;
2926 txn->origin_id = origin_id;
2927 txn->origin_lsn = origin_lsn;
2928
2929 /* Mark this transaction as a prepared transaction */
2932
2933 return true;
2934}
2935
2936/* Remember that we have skipped prepare */
2937void
2939{
2940 ReorderBufferTXN *txn;
2941
2942 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2943
2944 /* unknown transaction, nothing to do */
2945 if (txn == NULL)
2946 return;
2947
2948 /* txn must have been marked as a prepared transaction */
2951}
2952
2953/*
2954 * Prepare a two-phase transaction.
2955 *
2956 * See comments for ReorderBufferReplay().
2957 */
2958void
2960 char *gid)
2961{
2962 ReorderBufferTXN *txn;
2963
2964 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2965 false);
2966
2967 /* unknown transaction, nothing to replay */
2968 if (txn == NULL)
2969 return;
2970
2971 /*
2972 * txn must have been marked as a prepared transaction and must have
2973 * neither been skipped nor sent a prepare. Also, the prepare info must
2974 * have been updated in it by now.
2975 */
2978
2979 txn->gid = pstrdup(gid);
2980
2981 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
2982 txn->prepare_time, txn->origin_id, txn->origin_lsn);
2983
2984 /*
2985 * Send a prepare if not already done so. This might occur if we have
2986 * detected a concurrent abort while replaying the non-streaming
2987 * transaction.
2988 */
2989 if (!rbtxn_sent_prepare(txn))
2990 {
2991 rb->prepare(rb, txn, txn->final_lsn);
2993 }
2994}
2995
2996/*
2997 * This is used to handle COMMIT/ROLLBACK PREPARED.
2998 */
2999void
3001 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
3002 XLogRecPtr two_phase_at,
3003 TimestampTz commit_time, RepOriginId origin_id,
3004 XLogRecPtr origin_lsn, char *gid, bool is_commit)
3005{
3006 ReorderBufferTXN *txn;
3007 XLogRecPtr prepare_end_lsn;
3008 TimestampTz prepare_time;
3009
3010 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, commit_lsn, false);
3011
3012 /* unknown transaction, nothing to do */
3013 if (txn == NULL)
3014 return;
3015
3016 /*
3017 * By this time the txn has the prepare record information, remember it to
3018 * be later used for rollback.
3019 */
3020 prepare_end_lsn = txn->end_lsn;
3021 prepare_time = txn->prepare_time;
3022
3023 /* add the gid in the txn */
3024 txn->gid = pstrdup(gid);
3025
3026 /*
3027 * It is possible that this transaction is not decoded at prepare time
3028 * either because by that time we didn't have a consistent snapshot, or
3029 * two_phase was not enabled, or it was decoded earlier but we have
3030 * restarted. We only need to send the prepare if it was not decoded
3031 * earlier. We don't need to decode the xact for aborts if it is not done
3032 * already.
3033 */
3034 if ((txn->final_lsn < two_phase_at) && is_commit)
3035 {
3036 /*
3037 * txn must have been marked as a prepared transaction and skipped but
3038 * not sent a prepare. Also, the prepare info must have been updated
3039 * in txn even if we skip prepare.
3040 */
3044
3045 /*
3046 * By this time the txn has the prepare record information and it is
3047 * important to use that so that downstream gets the accurate
3048 * information. If instead, we have passed commit information here
3049 * then downstream can behave as it has already replayed commit
3050 * prepared after the restart.
3051 */
3052 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
3053 txn->prepare_time, txn->origin_id, txn->origin_lsn);
3054 }
3055
3056 txn->final_lsn = commit_lsn;
3057 txn->end_lsn = end_lsn;
3058 txn->commit_time = commit_time;
3059 txn->origin_id = origin_id;
3060 txn->origin_lsn = origin_lsn;
3061
3062 if (is_commit)
3063 rb->commit_prepared(rb, txn, commit_lsn);
3064 else
3065 rb->rollback_prepared(rb, txn, prepare_end_lsn, prepare_time);
3066
3067 /* cleanup: make sure there's no cache pollution */
3069 txn->invalidations);
3071}
3072
3073/*
3074 * Abort a transaction that possibly has previous changes. Needs to be first
3075 * called for subtransactions and then for the toplevel xid.
3076 *
3077 * NB: Transactions handled here have to have actively aborted (i.e. have
3078 * produced an abort record). Implicitly aborted transactions are handled via
3079 * ReorderBufferAbortOld(); transactions we're just not interested in, but
3080 * which have committed are handled in ReorderBufferForget().
3081 *
3082 * This function purges this transaction and its contents from memory and
3083 * disk.
3084 */
3085void
3087 TimestampTz abort_time)
3088{
3089 ReorderBufferTXN *txn;
3090
3091 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3092 false);
3093
3094 /* unknown, nothing to remove */
3095 if (txn == NULL)
3096 return;
3097
3098 txn->abort_time = abort_time;
3099
3100 /* For streamed transactions notify the remote node about the abort. */
3101 if (rbtxn_is_streamed(txn))
3102 {
3103 rb->stream_abort(rb, txn, lsn);
3104
3105 /*
3106 * We might have decoded changes for this transaction that could load
3107 * the cache as per the current transaction's view (consider DDL's
3108 * happened in this transaction). We don't want the decoding of future
3109 * transactions to use those cache entries so execute only the inval
3110 * messages in this transaction.
3111 */
3112 if (txn->ninvalidations > 0)
3114 txn->invalidations);
3115 }
3116
3117 /* cosmetic... */
3118 txn->final_lsn = lsn;
3119
3120 /* remove potential on-disk data, and deallocate */
3122}
3123
3124/*
3125 * Abort all transactions that aren't actually running anymore because the
3126 * server restarted.
3127 *
3128 * NB: These really have to be transactions that have aborted due to a server
3129 * crash/immediate restart, as we don't deal with invalidations here.
3130 */
3131void
3133{
3135
3136 /*
3137 * Iterate through all (potential) toplevel TXNs and abort all that are
3138 * older than what possibly can be running. Once we've found the first
3139 * that is alive we stop, there might be some that acquired an xid earlier
3140 * but started writing later, but it's unlikely and they will be cleaned
3141 * up in a later call to this function.
3142 */
3143 dlist_foreach_modify(it, &rb->toplevel_by_lsn)
3144 {
3145 ReorderBufferTXN *txn;
3146
3147 txn = dlist_container(ReorderBufferTXN, node, it.cur);
3148
3149 if (TransactionIdPrecedes(txn->xid, oldestRunningXid))
3150 {
3151 elog(DEBUG2, "aborting old transaction %u", txn->xid);
3152
3153 /* Notify the remote node about the crash/immediate restart. */
3154 if (rbtxn_is_streamed(txn))
3155 rb->stream_abort(rb, txn, InvalidXLogRecPtr);
3156
3157 /* remove potential on-disk data, and deallocate this tx */
3159 }
3160 else
3161 return;
3162 }
3163}
3164
3165/*
3166 * Forget the contents of a transaction if we aren't interested in its
3167 * contents. Needs to be first called for subtransactions and then for the
3168 * toplevel xid.
3169 *
3170 * This is significantly different to ReorderBufferAbort() because
3171 * transactions that have committed need to be treated differently from aborted
3172 * ones since they may have modified the catalog.
3173 *
3174 * Note that this is only allowed to be called in the moment a transaction
3175 * commit has just been read, not earlier; otherwise later records referring
3176 * to this xid might re-create the transaction incompletely.
3177 */
3178void
3180{
3181 ReorderBufferTXN *txn;
3182
3183 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3184 false);
3185
3186 /* unknown, nothing to forget */
3187 if (txn == NULL)
3188 return;
3189
3190 /* this transaction mustn't be streamed */
3192
3193 /* cosmetic... */
3194 txn->final_lsn = lsn;
3195
3196 /*
3197 * Process only cache invalidation messages in this transaction if there
3198 * are any. Even if we're not interested in the transaction's contents, it
3199 * could have manipulated the catalog and we need to update the caches
3200 * according to that.
3201 */
3202 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3204 txn->invalidations);
3205 else
3206 Assert(txn->ninvalidations == 0);
3207
3208 /* remove potential on-disk data, and deallocate */
3210}
3211
3212/*
3213 * Invalidate cache for those transactions that need to be skipped just in case
3214 * catalogs were manipulated as part of the transaction.
3215 *
3216 * Note that this is a special-purpose function for prepared transactions where
3217 * we don't want to clean up the TXN even when we decide to skip it. See
3218 * DecodePrepare.
3219 */
3220void
3222{
3223 ReorderBufferTXN *txn;
3224
3225 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3226 false);
3227
3228 /* unknown, nothing to do */
3229 if (txn == NULL)
3230 return;
3231
3232 /*
3233 * Process cache invalidation messages if there are any. Even if we're not
3234 * interested in the transaction's contents, it could have manipulated the
3235 * catalog and we need to update the caches according to that.
3236 */
3237 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3239 txn->invalidations);
3240 else
3241 Assert(txn->ninvalidations == 0);
3242}
3243
3244
3245/*
3246 * Execute invalidations happening outside the context of a decoded
3247 * transaction. That currently happens either for xid-less commits
3248 * (cf. RecordTransactionCommit()) or for invalidations in uninteresting
3249 * transactions (via ReorderBufferForget()).
3250 */
3251void
3253 SharedInvalidationMessage *invalidations)
3254{
3258 int i;
3259
3260 if (use_subtxn)
3262
3263 /*
3264 * Force invalidations to happen outside of a valid transaction - that way
3265 * entries will just be marked as invalid without accessing the catalog.
3266 * That's advantageous because we don't need to setup the full state
3267 * necessary for catalog access.
3268 */
3269 if (use_subtxn)
3271
3272 for (i = 0; i < ninvalidations; i++)
3273 LocalExecuteInvalidationMessage(&invalidations[i]);
3274
3275 if (use_subtxn)
3276 {
3279 CurrentResourceOwner = cowner;
3280 }
3281}
3282
3283/*
3284 * Tell reorderbuffer about an xid seen in the WAL stream. Has to be called at
3285 * least once for every xid in XLogRecord->xl_xid (other places in records
3286 * may, but do not have to be passed through here).
3287 *
3288 * Reorderbuffer keeps some data structures about transactions in LSN order,
3289 * for efficiency. To do that it has to know about when transactions are seen
3290 * first in the WAL. As many types of records are not actually interesting for
3291 * logical decoding, they do not necessarily pass through here.
3292 */
3293void
3295{
3296 /* many records won't have an xid assigned, centralize check here */
3297 if (xid != InvalidTransactionId)
3298 ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3299}
3300
3301/*
3302 * Add a new snapshot to this transaction that may only used after lsn 'lsn'
3303 * because the previous snapshot doesn't describe the catalog correctly for
3304 * following rows.
3305 */
3306void
3309{
3311
3312 change->data.snapshot = snap;
3314
3315 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3316}
3317
3318/*
3319 * Set up the transaction's base snapshot.
3320 *
3321 * If we know that xid is a subtransaction, set the base snapshot on the
3322 * top-level transaction instead.
3323 */
3324void
3327{
3328 ReorderBufferTXN *txn;
3329 bool is_new;
3330
3331 Assert(snap != NULL);
3332
3333 /*
3334 * Fetch the transaction to operate on. If we know it's a subtransaction,
3335 * operate on its top-level transaction instead.
3336 */
3337 txn = ReorderBufferTXNByXid(rb, xid, true, &is_new, lsn, true);
3338 if (rbtxn_is_known_subxact(txn))
3339 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3340 NULL, InvalidXLogRecPtr, false);
3341 Assert(txn->base_snapshot == NULL);
3342
3343 txn->base_snapshot = snap;
3344 txn->base_snapshot_lsn = lsn;
3345 dlist_push_tail(&rb->txns_by_base_snapshot_lsn, &txn->base_snapshot_node);
3346
3348}
3349
3350/*
3351 * Access the catalog with this CommandId at this point in the changestream.
3352 *
3353 * May only be called for command ids > 1
3354 */
3355void
3358{
3360
3361 change->data.command_id = cid;
3363
3364 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3365}
3366
3367/*
3368 * Update memory counters to account for the new or removed change.
3369 *
3370 * We update two counters - in the reorder buffer, and in the transaction
3371 * containing the change. The reorder buffer counter allows us to quickly
3372 * decide if we reached the memory limit, the transaction counter allows
3373 * us to quickly pick the largest transaction for eviction.
3374 *
3375 * Either txn or change must be non-NULL at least. We update the memory
3376 * counter of txn if it's non-NULL, otherwise change->txn.
3377 *
3378 * When streaming is enabled, we need to update the toplevel transaction
3379 * counters instead - we don't really care about subtransactions as we
3380 * can't stream them individually anyway, and we only pick toplevel
3381 * transactions for eviction. So only toplevel transactions matter.
3382 */
3383static void
3385 ReorderBufferChange *change,
3386 ReorderBufferTXN *txn,
3387 bool addition, Size sz)
3388{
3389 ReorderBufferTXN *toptxn;
3390
3391 Assert(txn || change);
3392
3393 /*
3394 * Ignore tuple CID changes, because those are not evicted when reaching
3395 * memory limit. So we just don't count them, because it might easily
3396 * trigger a pointless attempt to spill.
3397 */
3398 if (change && change->action == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID)
3399 return;
3400
3401 if (sz == 0)
3402 return;
3403
3404 if (txn == NULL)
3405 txn = change->txn;
3406 Assert(txn != NULL);
3407
3408 /*
3409 * Update the total size in top level as well. This is later used to
3410 * compute the decoding stats.
3411 */
3412 toptxn = rbtxn_get_toptxn(txn);
3413
3414 if (addition)
3415 {
3416 Size oldsize = txn->size;
3417
3418 txn->size += sz;
3419 rb->size += sz;
3420
3421 /* Update the total size in the top transaction. */
3422 toptxn->total_size += sz;
3423
3424 /* Update the max-heap */
3425 if (oldsize != 0)
3426 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3427 pairingheap_add(rb->txn_heap, &txn->txn_node);
3428 }
3429 else
3430 {
3431 Assert((rb->size >= sz) && (txn->size >= sz));
3432 txn->size -= sz;
3433 rb->size -= sz;
3434
3435 /* Update the total size in the top transaction. */
3436 toptxn->total_size -= sz;
3437
3438 /* Update the max-heap */
3439 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3440 if (txn->size != 0)
3441 pairingheap_add(rb->txn_heap, &txn->txn_node);
3442 }
3443
3444 Assert(txn->size <= rb->size);
3445}
3446
3447/*
3448 * Add new (relfilelocator, tid) -> (cmin, cmax) mappings.
3449 *
3450 * We do not include this change type in memory accounting, because we
3451 * keep CIDs in a separate list and do not evict them when reaching
3452 * the memory limit.
3453 */
3454void
3456 XLogRecPtr lsn, RelFileLocator locator,
3457 ItemPointerData tid, CommandId cmin,
3458 CommandId cmax, CommandId combocid)
3459{
3461 ReorderBufferTXN *txn;
3462
3463 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3464
3465 change->data.tuplecid.locator = locator;
3466 change->data.tuplecid.tid = tid;
3467 change->data.tuplecid.cmin = cmin;
3468 change->data.tuplecid.cmax = cmax;
3469 change->data.tuplecid.combocid = combocid;
3470 change->lsn = lsn;
3471 change->txn = txn;
3473
3474 dlist_push_tail(&txn->tuplecids, &change->node);
3475 txn->ntuplecids++;
3476}
3477
3478/*
3479 * Add new invalidation messages to the reorder buffer queue.
3480 */
3481static void
3483 XLogRecPtr lsn, Size nmsgs,
3485{
3486 ReorderBufferChange *change;
3487
3488 change = ReorderBufferAllocChange(rb);
3490 change->data.inval.ninvalidations = nmsgs;
3492 memcpy(change->data.inval.invalidations, msgs,
3493 sizeof(SharedInvalidationMessage) * nmsgs);
3494
3495 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3496}
3497
3498/*
3499 * A helper function for ReorderBufferAddInvalidations() and
3500 * ReorderBufferAddDistributedInvalidations() to accumulate the invalidation
3501 * messages to the **invals_out.
3502 */
3503static void
3508{
3509 if (*ninvals_out == 0)
3510 {
3514 }
3515 else
3516 {
3517 /* Enlarge the array of inval messages */
3520 (*ninvals_out + nmsgs_new));
3524 }
3525}
3526
3527/*
3528 * Accumulate the invalidations for executing them later.
3529 *
3530 * This needs to be called for each XLOG_XACT_INVALIDATIONS message and
3531 * accumulates all the invalidation messages in the toplevel transaction, if
3532 * available, otherwise in the current transaction, as well as in the form of
3533 * change in reorder buffer. We require to record it in form of the change
3534 * so that we can execute only the required invalidations instead of executing
3535 * all the invalidations on each CommandId increment. We also need to
3536 * accumulate these in the txn buffer because in some cases where we skip
3537 * processing the transaction (see ReorderBufferForget), we need to execute
3538 * all the invalidations together.
3539 */
3540void
3542 XLogRecPtr lsn, Size nmsgs,
3544{
3545 ReorderBufferTXN *txn;
3546 MemoryContext oldcontext;
3547
3548 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3549
3550 oldcontext = MemoryContextSwitchTo(rb->context);
3551
3552 /*
3553 * Collect all the invalidations under the top transaction, if available,
3554 * so that we can execute them all together. See comments atop this
3555 * function.
3556 */
3557 txn = rbtxn_get_toptxn(txn);
3558
3559 Assert(nmsgs > 0);
3560
3562 &txn->ninvalidations,
3563 msgs, nmsgs);
3564
3565 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3566
3567 MemoryContextSwitchTo(oldcontext);
3568}
3569
3570/*
3571 * Accumulate the invalidations distributed by other committed transactions
3572 * for executing them later.
3573 *
3574 * This function is similar to ReorderBufferAddInvalidations() but stores
3575 * the given inval messages to the txn->invalidations_distributed with the
3576 * overflow check.
3577 *
3578 * This needs to be called by committed transactions to distribute their
3579 * inval messages to in-progress transactions.
3580 */
3581void
3583 XLogRecPtr lsn, Size nmsgs,
3585{
3586 ReorderBufferTXN *txn;
3587 MemoryContext oldcontext;
3588
3589 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3590
3591 oldcontext = MemoryContextSwitchTo(rb->context);
3592
3593 /*
3594 * Collect all the invalidations under the top transaction, if available,
3595 * so that we can execute them all together. See comments
3596 * ReorderBufferAddInvalidations.
3597 */
3598 txn = rbtxn_get_toptxn(txn);
3599
3600 Assert(nmsgs > 0);
3601
3603 {
3604 /*
3605 * Check the transaction has enough space for storing distributed
3606 * invalidation messages.
3607 */
3609 {
3610 /*
3611 * Mark the invalidation message as overflowed and free up the
3612 * messages accumulated so far.
3613 */
3615
3617 {
3621 }
3622 }
3623 else
3626 msgs, nmsgs);
3627 }
3628
3629 /* Queue the invalidation messages into the transaction */
3630 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3631
3632 MemoryContextSwitchTo(oldcontext);
3633}
3634
3635/*
3636 * Apply all invalidations we know. Possibly we only need parts at this point
3637 * in the changestream but we don't know which those are.
3638 */
3639static void
3641{
3642 int i;
3643
3644 for (i = 0; i < nmsgs; i++)
3646}
3647
3648/*
3649 * Mark a transaction as containing catalog changes
3650 */
3651void
3653 XLogRecPtr lsn)
3654{
3655 ReorderBufferTXN *txn;
3656
3657 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3658
3659 if (!rbtxn_has_catalog_changes(txn))
3660 {
3662 dclist_push_tail(&rb->catchange_txns, &txn->catchange_node);
3663 }
3664
3665 /*
3666 * Mark top-level transaction as having catalog changes too if one of its
3667 * children has so that the ReorderBufferBuildTupleCidHash can
3668 * conveniently check just top-level transaction and decide whether to
3669 * build the hash table or not.
3670 */
3671 if (rbtxn_is_subtxn(txn))
3672 {
3673 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
3674
3675 if (!rbtxn_has_catalog_changes(toptxn))
3676 {
3678 dclist_push_tail(&rb->catchange_txns, &toptxn->catchange_node);
3679 }
3680 }
3681}
3682
3683/*
3684 * Return palloc'ed array of the transactions that have changed catalogs.
3685 * The returned array is sorted in xidComparator order.
3686 *
3687 * The caller must free the returned array when done with it.
3688 */
3691{
3692 dlist_iter iter;
3693 TransactionId *xids = NULL;
3694 size_t xcnt = 0;
3695
3696 /* Quick return if the list is empty */
3697 if (dclist_count(&rb->catchange_txns) == 0)
3698 return NULL;
3699
3700 /* Initialize XID array */
3701 xids = palloc_array(TransactionId, dclist_count(&rb->catchange_txns));
3702 dclist_foreach(iter, &rb->catchange_txns)
3703 {
3705 catchange_node,
3706 iter.cur);
3707
3709
3710 xids[xcnt++] = txn->xid;
3711 }
3712
3713 qsort(xids, xcnt, sizeof(TransactionId), xidComparator);
3714
3715 Assert(xcnt == dclist_count(&rb->catchange_txns));
3716 return xids;
3717}
3718
3719/*
3720 * Query whether a transaction is already *known* to contain catalog
3721 * changes. This can be wrong until directly before the commit!
3722 */
3723bool
3725{
3726 ReorderBufferTXN *txn;
3727
3728 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3729 false);
3730 if (txn == NULL)
3731 return false;
3732
3733 return rbtxn_has_catalog_changes(txn);
3734}
3735
3736/*
3737 * ReorderBufferXidHasBaseSnapshot
3738 * Have we already set the base snapshot for the given txn/subtxn?
3739 */
3740bool
3742{
3743 ReorderBufferTXN *txn;
3744
3745 txn = ReorderBufferTXNByXid(rb, xid, false,
3746 NULL, InvalidXLogRecPtr, false);
3747
3748 /* transaction isn't known yet, ergo no snapshot */
3749 if (txn == NULL)
3750 return false;
3751
3752 /* a known subtxn? operate on top-level txn instead */
3753 if (rbtxn_is_known_subxact(txn))
3754 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3755 NULL, InvalidXLogRecPtr, false);
3756
3757 return txn->base_snapshot != NULL;
3758}
3759
3760
3761/*
3762 * ---------------------------------------
3763 * Disk serialization support
3764 * ---------------------------------------
3765 */
3766
3767/*
3768 * Ensure the IO buffer is >= sz.
3769 */
3770static void
3772{
3773 if (!rb->outbufsize)
3774 {
3775 rb->outbuf = MemoryContextAlloc(rb->context, sz);
3776 rb->outbufsize = sz;
3777 }
3778 else if (rb->outbufsize < sz)
3779 {
3780 rb->outbuf = repalloc(rb->outbuf, sz);
3781 rb->outbufsize = sz;
3782 }
3783}
3784
3785
3786/* Compare two transactions by size */
3787static int
3789{
3792
3793 if (ta->size < tb->size)
3794 return -1;
3795 if (ta->size > tb->size)
3796 return 1;
3797 return 0;
3798}
3799
3800/*
3801 * Find the largest transaction (toplevel or subxact) to evict (spill to disk).
3802 */
3803static ReorderBufferTXN *
3805{
3807
3808 /* Get the largest transaction from the max-heap */
3810 pairingheap_first(rb->txn_heap));
3811
3812 Assert(largest);
3813 Assert(largest->size > 0);
3814 Assert(largest->size <= rb->size);
3815
3816 return largest;
3817}
3818
3819/*
3820 * Find the largest streamable (and non-aborted) toplevel transaction to evict
3821 * (by streaming).
3822 *
3823 * This can be seen as an optimized version of ReorderBufferLargestTXN, which
3824 * should give us the same transaction (because we don't update memory account
3825 * for subtransaction with streaming, so it's always 0). But we can simply
3826 * iterate over the limited number of toplevel transactions that have a base
3827 * snapshot. There is no use of selecting a transaction that doesn't have base
3828 * snapshot because we don't decode such transactions. Also, we do not select
3829 * the transaction which doesn't have any streamable change.
3830 *
3831 * Note that, we skip transactions that contain incomplete changes. There
3832 * is a scope of optimization here such that we can select the largest
3833 * transaction which has incomplete changes. But that will make the code and
3834 * design quite complex and that might not be worth the benefit. If we plan to
3835 * stream the transactions that contain incomplete changes then we need to
3836 * find a way to partially stream/truncate the transaction changes in-memory
3837 * and build a mechanism to partially truncate the spilled files.
3838 * Additionally, whenever we partially stream the transaction we need to
3839 * maintain the last streamed lsn and next time we need to restore from that
3840 * segment and the offset in WAL. As we stream the changes from the top
3841 * transaction and restore them subtransaction wise, we need to even remember
3842 * the subxact from where we streamed the last change.
3843 */
3844static ReorderBufferTXN *
3846{
3847 dlist_iter iter;
3848 Size largest_size = 0;
3850
3851 /* Find the largest top-level transaction having a base snapshot. */
3852 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
3853 {
3854 ReorderBufferTXN *txn;
3855
3856 txn = dlist_container(ReorderBufferTXN, base_snapshot_node, iter.cur);
3857
3858 /* must not be a subtxn */
3860 /* base_snapshot must be set */
3861 Assert(txn->base_snapshot != NULL);
3862
3863 /* Don't consider these kinds of transactions for eviction. */
3864 if (rbtxn_has_partial_change(txn) ||
3866 rbtxn_is_aborted(txn))
3867 continue;
3868
3869 /* Find the largest of the eviction candidates. */
3870 if ((largest == NULL || txn->total_size > largest_size) &&
3871 (txn->total_size > 0))
3872 {
3873 largest = txn;
3874 largest_size = txn->total_size;
3875 }
3876 }
3877
3878 return largest;
3879}
3880
3881/*
3882 * Check whether the logical_decoding_work_mem limit was reached, and if yes
3883 * pick the largest (sub)transaction at-a-time to evict and spill its changes to
3884 * disk or send to the output plugin until we reach under the memory limit.
3885 *
3886 * If debug_logical_replication_streaming is set to "immediate", stream or
3887 * serialize the changes immediately.
3888 *
3889 * XXX At this point we select the transactions until we reach under the memory
3890 * limit, but we might also adapt a more elaborate eviction strategy - for example
3891 * evicting enough transactions to free certain fraction (e.g. 50%) of the memory
3892 * limit.
3893 */
3894static void
3896{
3897 ReorderBufferTXN *txn;
3898 bool update_stats = true;
3899
3900 if (rb->size >= logical_decoding_work_mem * (Size) 1024)
3901 {
3902 /*
3903 * Update the statistics as the memory usage has reached the limit. We
3904 * report the statistics update later in this function since we can
3905 * update the slot statistics altogether while streaming or
3906 * serializing transactions in most cases.
3907 */
3908 rb->memExceededCount += 1;
3909 }
3911 {
3912 /*
3913 * Bail out if debug_logical_replication_streaming is buffered and we
3914 * haven't exceeded the memory limit.
3915 */
3916 return;
3917 }
3918
3919 /*
3920 * If debug_logical_replication_streaming is immediate, loop until there's
3921 * no change. Otherwise, loop until we reach under the memory limit. One
3922 * might think that just by evicting the largest (sub)transaction we will
3923 * come under the memory limit based on assumption that the selected
3924 * transaction is at least as large as the most recent change (which
3925 * caused us to go over the memory limit). However, that is not true
3926 * because a user can reduce the logical_decoding_work_mem to a smaller
3927 * value before the most recent change.
3928 */
3929 while (rb->size >= logical_decoding_work_mem * (Size) 1024 ||
3931 rb->size > 0))
3932 {
3933 /*
3934 * Pick the largest non-aborted transaction and evict it from memory
3935 * by streaming, if possible. Otherwise, spill to disk.
3936 */
3939 {
3940 /* we know there has to be one, because the size is not zero */
3941 Assert(txn && rbtxn_is_toptxn(txn));
3942 Assert(txn->total_size > 0);
3943 Assert(rb->size >= txn->total_size);
3944
3945 /* skip the transaction if aborted */
3947 continue;
3948
3950 }
3951 else
3952 {
3953 /*
3954 * Pick the largest transaction (or subtransaction) and evict it
3955 * from memory by serializing it to disk.
3956 */
3958
3959 /* we know there has to be one, because the size is not zero */
3960 Assert(txn);
3961 Assert(txn->size > 0);
3962 Assert(rb->size >= txn->size);
3963
3964 /* skip the transaction if aborted */
3966 continue;
3967
3969 }
3970
3971 /*
3972 * After eviction, the transaction should have no entries in memory,
3973 * and should use 0 bytes for changes.
3974 */
3975 Assert(txn->size == 0);
3976 Assert(txn->nentries_mem == 0);
3977
3978 /*
3979 * We've reported the memExceededCount update while streaming or
3980 * serializing the transaction.
3981 */
3982 update_stats = false;
3983 }
3984
3985 if (update_stats)
3987
3988 /* We must be under the memory limit now. */
3989 Assert(rb->size < logical_decoding_work_mem * (Size) 1024);
3990}
3991
3992/*
3993 * Spill data of a large transaction (and its subtransactions) to disk.
3994 */
3995static void
3997{
4000 int fd = -1;
4002 Size spilled = 0;
4003 Size size = txn->size;
4004
4005 elog(DEBUG2, "spill %u changes in XID %u to disk",
4006 (uint32) txn->nentries_mem, txn->xid);
4007
4008 /* do the same to all child TXs */
4010 {
4012
4015 }
4016
4017 /* serialize changestream */
4019 {
4020 ReorderBufferChange *change;
4021
4022 change = dlist_container(ReorderBufferChange, node, change_i.cur);
4023
4024 /*
4025 * store in segment in which it belongs by start lsn, don't split over
4026 * multiple segments tho
4027 */
4028 if (fd == -1 ||
4030 {
4031 char path[MAXPGPATH];
4032
4033 if (fd != -1)
4035
4037
4038 /*
4039 * No need to care about TLIs here, only used during a single run,
4040 * so each LSN only maps to a specific WAL record.
4041 */
4043 curOpenSegNo);
4044
4045 /* open segment, create it if necessary */
4046 fd = OpenTransientFile(path,
4048
4049 if (fd < 0)
4050 ereport(ERROR,
4052 errmsg("could not open file \"%s\": %m", path)));
4053 }
4054
4055 ReorderBufferSerializeChange(rb, txn, fd, change);
4056 dlist_delete(&change->node);
4057 ReorderBufferFreeChange(rb, change, false);
4058
4059 spilled++;
4060 }
4061
4062 /* Update the memory counter */
4063 ReorderBufferChangeMemoryUpdate(rb, NULL, txn, false, size);
4064
4065 /* update the statistics iff we have spilled anything */
4066 if (spilled)
4067 {
4068 rb->spillCount += 1;
4069 rb->spillBytes += size;
4070
4071 /* don't consider already serialized transactions */
4072 rb->spillTxns += (rbtxn_is_serialized(txn) || rbtxn_is_serialized_clear(txn)) ? 0 : 1;
4073
4074 /* update the decoding stats */
4076 }
4077
4078 Assert(spilled == txn->nentries_mem);
4080 txn->nentries_mem = 0;
4082
4083 if (fd != -1)
4085}
4086
4087/*
4088 * Serialize individual change to disk.
4089 */
4090static void
4092 int fd, ReorderBufferChange *change)
4093{
4096
4098
4099 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4100 memcpy(&ondisk->change, change, sizeof(ReorderBufferChange));
4101
4102 switch (change->action)
4103 {
4104 /* fall through these, they're all similar enough */
4109 {
4110 char *data;
4112 newtup;
4113 Size oldlen = 0;
4114 Size newlen = 0;
4115
4116 oldtup = change->data.tp.oldtuple;
4117 newtup = change->data.tp.newtuple;
4118
4119 if (oldtup)
4120 {
4121 sz += sizeof(HeapTupleData);
4122 oldlen = oldtup->t_len;
4123 sz += oldlen;
4124 }
4125
4126 if (newtup)
4127 {
4128 sz += sizeof(HeapTupleData);
4129 newlen = newtup->t_len;
4130 sz += newlen;
4131 }
4132
4133 /* make sure we have enough space */
4135
4136 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4137 /* might have been reallocated above */
4138 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4139
4140 if (oldlen)
4141 {
4142 memcpy(data, oldtup, sizeof(HeapTupleData));
4143 data += sizeof(HeapTupleData);
4144
4145 memcpy(data, oldtup->t_data, oldlen);
4146 data += oldlen;
4147 }
4148
4149 if (newlen)
4150 {
4151 memcpy(data, newtup, sizeof(HeapTupleData));
4152 data += sizeof(HeapTupleData);
4153
4154 memcpy(data, newtup->t_data, newlen);
4155 data += newlen;
4156 }
4157 break;
4158 }
4160 {
4161 char *data;
4162 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4163
4164 sz += prefix_size + change->data.msg.message_size +
4165 sizeof(Size) + sizeof(Size);
4167
4168 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4169
4170 /* might have been reallocated above */
4171 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4172
4173 /* write the prefix including the size */
4174 memcpy(data, &prefix_size, sizeof(Size));
4175 data += sizeof(Size);
4176 memcpy(data, change->data.msg.prefix,
4177 prefix_size);
4178 data += prefix_size;
4179
4180 /* write the message including the size */
4181 memcpy(data, &change->data.msg.message_size, sizeof(Size));
4182 data += sizeof(Size);
4183 memcpy(data, change->data.msg.message,
4184 change->data.msg.message_size);
4185 data += change->data.msg.message_size;
4186
4187 break;
4188 }
4190 {
4191 char *data;
4193 change->data.inval.ninvalidations;
4194
4195 sz += inval_size;
4196
4198 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4199
4200 /* might have been reallocated above */
4201 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4203 data += inval_size;
4204
4205 break;
4206 }
4208 {
4209 Snapshot snap;
4210 char *data;
4211
4212 snap = change->data.snapshot;
4213
4214 sz += sizeof(SnapshotData) +
4215 sizeof(TransactionId) * snap->xcnt +
4216 sizeof(TransactionId) * snap->subxcnt;
4217
4218 /* make sure we have enough space */
4220 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4221 /* might have been reallocated above */
4222 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4223
4224 memcpy(data, snap, sizeof(SnapshotData));
4225 data += sizeof(SnapshotData);
4226
4227 if (snap->xcnt)
4228 {
4229 memcpy(data, snap->xip,
4230 sizeof(TransactionId) * snap->xcnt);
4231 data += sizeof(TransactionId) * snap->xcnt;
4232 }
4233
4234 if (snap->subxcnt)
4235 {
4236 memcpy(data, snap->subxip,
4237 sizeof(TransactionId) * snap->subxcnt);
4238 data += sizeof(TransactionId) * snap->subxcnt;
4239 }
4240 break;
4241 }
4243 {
4244 Size size;
4245 char *data;
4246
4247 /* account for the OIDs of truncated relations */
4248 size = sizeof(Oid) * change->data.truncate.nrelids;
4249 sz += size;
4250
4251 /* make sure we have enough space */
4253
4254 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4255 /* might have been reallocated above */
4256 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4257
4258 memcpy(data, change->data.truncate.relids, size);
4259 data += size;
4260
4261 break;
4262 }
4267 /* ReorderBufferChange contains everything important */
4268 break;
4269 }
4270
4271 ondisk->size = sz;
4272
4273 errno = 0;
4275 if (write(fd, rb->outbuf, ondisk->size) != ondisk->size)
4276 {
4277 int save_errno = errno;
4278
4280
4281 /* if write didn't set errno, assume problem is no disk space */
4283 ereport(ERROR,
4285 errmsg("could not write to data file for XID %u: %m",
4286 txn->xid)));
4287 }
4289
4290 /*
4291 * Keep the transaction's final_lsn up to date with each change we send to
4292 * disk, so that ReorderBufferRestoreCleanup works correctly. (We used to
4293 * only do this on commit and abort records, but that doesn't work if a
4294 * system crash leaves a transaction without its abort record).
4295 *
4296 * Make sure not to move it backwards.
4297 */
4298 if (txn->final_lsn < change->lsn)
4299 txn->final_lsn = change->lsn;
4300
4301 Assert(ondisk->change.action == change->action);
4302}
4303
4304/* Returns true, if the output plugin supports streaming, false, otherwise. */
4305static inline bool
4307{
4308 LogicalDecodingContext *ctx = rb->private_data;
4309
4310 return ctx->streaming;
4311}
4312
4313/* Returns true, if the streaming can be started now, false, otherwise. */
4314static inline bool
4316{
4317 LogicalDecodingContext *ctx = rb->private_data;
4318 SnapBuild *builder = ctx->snapshot_builder;
4319
4320 /* We can't start streaming unless a consistent state is reached. */
4322 return false;
4323
4324 /*
4325 * We can't start streaming immediately even if the streaming is enabled
4326 * because we previously decoded this transaction and now just are
4327 * restarting.
4328 */
4330 !SnapBuildXactNeedsSkip(builder, ctx->reader->ReadRecPtr))
4331 return true;
4332
4333 return false;
4334}
4335
4336/*
4337 * Send data of a large transaction (and its subtransactions) to the
4338 * output plugin, but using the stream API.
4339 */
4340static void
4342{
4343 Snapshot snapshot_now;
4344 CommandId command_id;
4345 Size stream_bytes;
4346 bool txn_is_streamed;
4347
4348 /* We can never reach here for a subtransaction. */
4349 Assert(rbtxn_is_toptxn(txn));
4350
4351 /*
4352 * We can't make any assumptions about base snapshot here, similar to what
4353 * ReorderBufferCommit() does. That relies on base_snapshot getting
4354 * transferred from subxact in ReorderBufferCommitChild(), but that was
4355 * not yet called as the transaction is in-progress.
4356 *
4357 * So just walk the subxacts and use the same logic here. But we only need
4358 * to do that once, when the transaction is streamed for the first time.
4359 * After that we need to reuse the snapshot from the previous run.
4360 *
4361 * Unlike DecodeCommit which adds xids of all the subtransactions in
4362 * snapshot's xip array via SnapBuildCommitTxn, we can't do that here but
4363 * we do add them to subxip array instead via ReorderBufferCopySnap. This
4364 * allows the catalog changes made in subtransactions decoded till now to
4365 * be visible.
4366 */
4367 if (txn->snapshot_now == NULL)
4368 {
4370
4371 /* make sure this transaction is streamed for the first time */
4373
4374 /* at the beginning we should have invalid command ID */
4376
4378 {
4380
4383 }
4384
4385 /*
4386 * If this transaction has no snapshot, it didn't make any changes to
4387 * the database till now, so there's nothing to decode.
4388 */
4389 if (txn->base_snapshot == NULL)
4390 {
4391 Assert(txn->ninvalidations == 0);
4392 return;
4393 }
4394
4395 command_id = FirstCommandId;
4396 snapshot_now = ReorderBufferCopySnap(rb, txn->base_snapshot,
4397 txn, command_id);
4398 }
4399 else
4400 {
4401 /* the transaction must have been already streamed */
4403
4404 /*
4405 * Nah, we already have snapshot from the previous streaming run. We
4406 * assume new subxacts can't move the LSN backwards, and so can't beat
4407 * the LSN condition in the previous branch (so no need to walk
4408 * through subxacts again). In fact, we must not do that as we may be
4409 * using snapshot half-way through the subxact.
4410 */
4411 command_id = txn->command_id;
4412
4413 /*
4414 * We can't use txn->snapshot_now directly because after the last
4415 * streaming run, we might have got some new sub-transactions. So we
4416 * need to add them to the snapshot.
4417 */
4418 snapshot_now = ReorderBufferCopySnap(rb, txn->snapshot_now,
4419 txn, command_id);
4420
4421 /* Free the previously copied snapshot. */
4422 Assert(txn->snapshot_now->copied);
4424 txn->snapshot_now = NULL;
4425 }
4426
4427 /*
4428 * Remember this information to be used later to update stats. We can't
4429 * update the stats here as an error while processing the changes would
4430 * lead to the accumulation of stats even though we haven't streamed all
4431 * the changes.
4432 */
4434 stream_bytes = txn->total_size;
4435
4436 /* Process and send the changes to output plugin. */
4437 ReorderBufferProcessTXN(rb, txn, InvalidXLogRecPtr, snapshot_now,
4438 command_id, true);
4439
4440 rb->streamCount += 1;
4441 rb->streamBytes += stream_bytes;
4442
4443 /* Don't consider already streamed transaction. */
4444 rb->streamTxns += (txn_is_streamed) ? 0 : 1;
4445
4446 /* update the decoding stats */
4448
4450 Assert(txn->nentries == 0);
4451 Assert(txn->nentries_mem == 0);
4452}
4453
4454/*
4455 * Size of a change in memory.
4456 */
4457static Size
4459{
4460 Size sz = sizeof(ReorderBufferChange);
4461
4462 switch (change->action)
4463 {
4464 /* fall through these, they're all similar enough */
4469 {
4471 newtup;
4472 Size oldlen = 0;
4473 Size newlen = 0;
4474
4475 oldtup = change->data.tp.oldtuple;
4476 newtup = change->data.tp.newtuple;
4477
4478 if (oldtup)
4479 {
4480 sz += sizeof(HeapTupleData);
4481 oldlen = oldtup->t_len;
4482 sz += oldlen;
4483 }
4484
4485 if (newtup)
4486 {
4487 sz += sizeof(HeapTupleData);
4488 newlen = newtup->t_len;
4489 sz += newlen;
4490 }
4491
4492 break;
4493 }
4495 {
4496 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4497
4498 sz += prefix_size + change->data.msg.message_size +
4499 sizeof(Size) + sizeof(Size);
4500
4501 break;
4502 }
4504 {
4505 sz += sizeof(SharedInvalidationMessage) *
4506 change->data.inval.ninvalidations;
4507 break;
4508 }
4510 {
4511 Snapshot snap;
4512
4513 snap = change->data.snapshot;
4514
4515 sz += sizeof(SnapshotData) +
4516 sizeof(TransactionId) * snap->xcnt +
4517 sizeof(TransactionId) * snap->subxcnt;
4518
4519 break;
4520 }
4522 {
4523 sz += sizeof(Oid) * change->data.truncate.nrelids;
4524
4525 break;
4526 }
4531 /* ReorderBufferChange contains everything important */
4532 break;
4533 }
4534
4535 return sz;
4536}
4537
4538
4539/*
4540 * Restore a number of changes spilled to disk back into memory.
4541 */
4542static Size
4544 TXNEntryFile *file, XLogSegNo *segno)
4545{
4546 Size restored = 0;
4549 File *fd = &file->vfd;
4550
4553
4554 /* free current entries, so we have memory for more */
4556 {
4559
4560 dlist_delete(&cleanup->node);
4562 }
4563 txn->nentries_mem = 0;
4565
4567
4568 while (restored < max_changes_in_memory && *segno <= last_segno)
4569 {
4570 int readBytes;
4572
4574
4575 if (*fd == -1)
4576 {
4577 char path[MAXPGPATH];
4578
4579 /* first time in */
4580 if (*segno == 0)
4581 XLByteToSeg(txn->first_lsn, *segno, wal_segment_size);
4582
4583 Assert(*segno != 0 || dlist_is_empty(&txn->changes));
4584
4585 /*
4586 * No need to care about TLIs here, only used during a single run,
4587 * so each LSN only maps to a specific WAL record.
4588 */
4590 *segno);
4591
4593
4594 /* No harm in resetting the offset even in case of failure */
4595 file->curOffset = 0;
4596
4597 if (*fd < 0 && errno == ENOENT)
4598 {
4599 *fd = -1;
4600 (*segno)++;
4601 continue;
4602 }
4603 else if (*fd < 0)
4604 ereport(ERROR,
4606 errmsg("could not open file \"%s\": %m",
4607 path)));
4608 }
4609
4610 /*
4611 * Read the statically sized part of a change which has information
4612 * about the total size. If we couldn't read a record, we're at the
4613 * end of this file.
4614 */
4616 readBytes = FileRead(file->vfd, rb->outbuf,
4619
4620 /* eof */
4621 if (readBytes == 0)
4622 {
4623 FileClose(*fd);
4624 *fd = -1;
4625 (*segno)++;
4626 continue;
4627 }
4628 else if (readBytes < 0)
4629 ereport(ERROR,
4631 errmsg("could not read from reorderbuffer spill file: %m")));
4632 else if (readBytes != sizeof(ReorderBufferDiskChange))
4633 ereport(ERROR,
4635 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4636 readBytes,
4637 (uint32) sizeof(ReorderBufferDiskChange))));
4638
4639 file->curOffset += readBytes;
4640
4641 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4642
4644 sizeof(ReorderBufferDiskChange) + ondisk->size);
4645 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4646
4647 readBytes = FileRead(file->vfd,
4648 rb->outbuf + sizeof(ReorderBufferDiskChange),
4649 ondisk->size - sizeof(ReorderBufferDiskChange),
4650 file->curOffset,
4652
4653 if (readBytes < 0)
4654 ereport(ERROR,
4656 errmsg("could not read from reorderbuffer spill file: %m")));
4657 else if (readBytes != ondisk->size - sizeof(ReorderBufferDiskChange))
4658 ereport(ERROR,
4660 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4661 readBytes,
4662 (uint32) (ondisk->size - sizeof(ReorderBufferDiskChange)))));
4663
4664 file->curOffset += readBytes;
4665
4666 /*
4667 * ok, read a full change from disk, now restore it into proper
4668 * in-memory format
4669 */
4670 ReorderBufferRestoreChange(rb, txn, rb->outbuf);
4671 restored++;
4672 }
4673
4674 return restored;
4675}
4676
4677/*
4678 * Convert change from its on-disk format to in-memory format and queue it onto
4679 * the TXN's ->changes list.
4680 *
4681 * Note: although "data" is declared char*, at entry it points to a
4682 * maxalign'd buffer, making it safe in most of this function to assume
4683 * that the pointed-to data is suitably aligned for direct access.
4684 */
4685static void
4687 char *data)
4688{
4690 ReorderBufferChange *change;
4691
4692 ondisk = (ReorderBufferDiskChange *) data;
4693
4694 change = ReorderBufferAllocChange(rb);
4695
4696 /* copy static part */
4697 memcpy(change, &ondisk->change, sizeof(ReorderBufferChange));
4698
4699 data += sizeof(ReorderBufferDiskChange);
4700
4701 /* restore individual stuff */
4702 switch (change->action)
4703 {
4704 /* fall through these, they're all similar enough */
4709 if (change->data.tp.oldtuple)
4710 {
4711 uint32 tuplelen = ((HeapTuple) data)->t_len;
4712
4713 change->data.tp.oldtuple =
4715
4716 /* restore ->tuple */
4717 memcpy(change->data.tp.oldtuple, data,
4718 sizeof(HeapTupleData));
4719 data += sizeof(HeapTupleData);
4720
4721 /* reset t_data pointer into the new tuplebuf */
4722 change->data.tp.oldtuple->t_data =
4723 (HeapTupleHeader) ((char *) change->data.tp.oldtuple + HEAPTUPLESIZE);
4724
4725 /* restore tuple data itself */
4727 data += tuplelen;
4728 }
4729
4730 if (change->data.tp.newtuple)
4731 {
4732 /* here, data might not be suitably aligned! */
4734
4736 sizeof(uint32));
4737
4738 change->data.tp.newtuple =
4740
4741 /* restore ->tuple */
4742 memcpy(change->data.tp.newtuple, data,
4743 sizeof(HeapTupleData));
4744 data += sizeof(HeapTupleData);
4745
4746 /* reset t_data pointer into the new tuplebuf */
4747 change->data.tp.newtuple->t_data =
4748 (HeapTupleHeader) ((char *) change->data.tp.newtuple + HEAPTUPLESIZE);
4749
4750 /* restore tuple data itself */
4752 data += tuplelen;
4753 }
4754
4755 break;
4757 {
4758 Size prefix_size;
4759
4760 /* read prefix */
4761 memcpy(&prefix_size, data, sizeof(Size));
4762 data += sizeof(Size);
4763 change->data.msg.prefix = MemoryContextAlloc(rb->context,
4764 prefix_size);
4765 memcpy(change->data.msg.prefix, data, prefix_size);
4766 Assert(change->data.msg.prefix[prefix_size - 1] == '\0');
4767 data += prefix_size;
4768
4769 /* read the message */
4770 memcpy(&change->data.msg.message_size, data, sizeof(Size));
4771 data += sizeof(Size);
4772 change->data.msg.message = MemoryContextAlloc(rb->context,
4773 change->data.msg.message_size);
4774 memcpy(change->data.msg.message, data,
4775 change->data.msg.message_size);
4776 data += change->data.msg.message_size;
4777
4778 break;
4779 }
4781 {
4783 change->data.inval.ninvalidations;
4784
4785 change->data.inval.invalidations =
4786 MemoryContextAlloc(rb->context, inval_size);
4787
4788 /* read the message */
4790
4791 break;
4792 }
4794 {
4797 Size size;
4798
4799 oldsnap = (Snapshot) data;
4800
4801 size = sizeof(SnapshotData) +
4802 sizeof(TransactionId) * oldsnap->xcnt +
4803 sizeof(TransactionId) * (oldsnap->subxcnt + 0);
4804
4805 change->data.snapshot = MemoryContextAllocZero(rb->context, size);
4806
4807 newsnap = change->data.snapshot;
4808
4809 memcpy(newsnap, data, size);
4810 newsnap->xip = (TransactionId *)
4811 (((char *) newsnap) + sizeof(SnapshotData));
4812 newsnap->subxip = newsnap->xip + newsnap->xcnt;
4813 newsnap->copied = true;
4814 break;
4815 }
4816 /* the base struct contains all the data, easy peasy */
4818 {
4819 Oid *relids;
4820
4821 relids = ReorderBufferAllocRelids(rb, change->data.truncate.nrelids);
4822 memcpy(relids, data, change->data.truncate.nrelids * sizeof(Oid));
4823 change->data.truncate.relids = relids;
4824
4825 break;
4826 }
4831 break;
4832 }
4833
4834 dlist_push_tail(&txn->changes, &change->node);
4835 txn->nentries_mem++;
4836
4837 /*
4838 * Update memory accounting for the restored change. We need to do this
4839 * although we don't check the memory limit when restoring the changes in
4840 * this branch (we only do that when initially queueing the changes after
4841 * decoding), because we will release the changes later, and that will
4842 * update the accounting too (subtracting the size from the counters). And
4843 * we don't want to underflow there.
4844 */
4846 ReorderBufferChangeSize(change));
4847}
4848
4849/*
4850 * Remove all on-disk stored for the passed in transaction.
4851 */
4852static void
4854{
4855 XLogSegNo first;
4856 XLogSegNo cur;
4857 XLogSegNo last;
4858
4861
4864
4865 /* iterate over all possible filenames, and delete them */
4866 for (cur = first; cur <= last; cur++)
4867 {
4868 char path[MAXPGPATH];
4869
4871 if (unlink(path) != 0 && errno != ENOENT)
4872 ereport(ERROR,
4874 errmsg("could not remove file \"%s\": %m", path)));
4875 }
4876}
4877
4878/*
4879 * Remove any leftover serialized reorder buffers from a slot directory after a
4880 * prior crash or decoding session exit.
4881 */
4882static void
4883ReorderBufferCleanupSerializedTXNs(const char *slotname)
4884{
4885 DIR *spill_dir;
4886 struct dirent *spill_de;
4887 struct stat statbuf;
4888 char path[MAXPGPATH * 2 + sizeof(PG_REPLSLOT_DIR)];
4889
4890 sprintf(path, "%s/%s", PG_REPLSLOT_DIR, slotname);
4891
4892 /* we're only handling directories here, skip if it's not ours */
4893 if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
4894 return;
4895
4896 spill_dir = AllocateDir(path);
4897 while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL)
4898 {
4899 /* only look at names that can be ours */
4900 if (strncmp(spill_de->d_name, "xid", 3) == 0)
4901 {
4902 snprintf(path, sizeof(path),
4903 "%s/%s/%s", PG_REPLSLOT_DIR, slotname,
4904 spill_de->d_name);
4905
4906 if (unlink(path) != 0)
4907 ereport(ERROR,
4909 errmsg("could not remove file \"%s\" during removal of %s/%s/xid*: %m",
4910 path, PG_REPLSLOT_DIR, slotname)));
4911 }
4912 }
4914}
4915
4916/*
4917 * Given a replication slot, transaction ID and segment number, fill in the
4918 * corresponding spill file into 'path', which is a caller-owned buffer of size
4919 * at least MAXPGPATH.
4920 */
4921static void
4923 XLogSegNo segno)
4924{
4926
4928
4929 snprintf(path, MAXPGPATH, "%s/%s/xid-%u-lsn-%X-%X.spill",
4932 xid, LSN_FORMAT_ARGS(recptr));
4933}
4934
4935/*
4936 * Delete all data spilled to disk after we've restarted/crashed. It will be
4937 * recreated when the respective slots are reused.
4938 */
4939void
4941{
4943 struct dirent *logical_de;
4944
4947 {
4948 if (strcmp(logical_de->d_name, ".") == 0 ||
4949 strcmp(logical_de->d_name, "..") == 0)
4950 continue;
4951
4952 /* if it cannot be a slot, skip the directory */
4953 if (!ReplicationSlotValidateName(logical_de->d_name, true, DEBUG2))
4954 continue;
4955
4956 /*
4957 * ok, has to be a surviving logical slot, iterate and delete
4958 * everything starting with xid-*
4959 */
4961 }
4963}
4964
4965/* ---------------------------------------
4966 * toast reassembly support
4967 * ---------------------------------------
4968 */
4969
4970/*
4971 * Initialize per tuple toast reconstruction support.
4972 */
4973static void
4975{
4977
4978 Assert(txn->toast_hash == NULL);
4979
4980 hash_ctl.keysize = sizeof(Oid);
4981 hash_ctl.entrysize = sizeof(ReorderBufferToastEnt);
4982 hash_ctl.hcxt = rb->context;
4983 txn->toast_hash = hash_create("ReorderBufferToastHash", 5, &hash_ctl,
4985}
4986
4987/*
4988 * Per toast-chunk handling for toast reconstruction
4989 *
4990 * Appends a toast chunk so we can reconstruct it when the tuple "owning" the
4991 * toasted Datum comes along.
4992 */
4993static void
4995 Relation relation, ReorderBufferChange *change)
4996{
4999 bool found;
5001 bool isnull;
5002 Pointer chunk;
5003 TupleDesc desc = RelationGetDescr(relation);
5004 Oid chunk_id;
5006
5007 if (txn->toast_hash == NULL)
5009
5010 Assert(IsToastRelation(relation));
5011
5012 newtup = change->data.tp.newtuple;
5013 chunk_id = DatumGetObjectId(fastgetattr(newtup, 1, desc, &isnull));
5014 Assert(!isnull);
5015 chunk_seq = DatumGetInt32(fastgetattr(newtup, 2, desc, &isnull));
5016 Assert(!isnull);
5017
5019 hash_search(txn->toast_hash, &chunk_id, HASH_ENTER, &found);
5020
5021 if (!found)
5022 {
5023 Assert(ent->chunk_id == chunk_id);
5024 ent->num_chunks = 0;
5025 ent->last_chunk_seq = 0;
5026 ent->size = 0;
5027 ent->reconstructed = NULL;
5028 dlist_init(&ent->chunks);
5029
5030 if (chunk_seq != 0)
5031 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq 0",
5032 chunk_seq, chunk_id);
5033 }
5034 else if (found && chunk_seq != ent->last_chunk_seq + 1)
5035 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq %d",
5036 chunk_seq, chunk_id, ent->last_chunk_seq + 1);
5037
5038 chunk = DatumGetPointer(fastgetattr(newtup, 3, desc, &isnull));
5039 Assert(!isnull);
5040
5041 /* calculate size so we can allocate the right size at once later */
5044 else if (VARATT_IS_SHORT(chunk))
5045 /* could happen due to heap_form_tuple doing its thing */
5047 else
5048 elog(ERROR, "unexpected type of toast chunk");
5049
5050 ent->size += chunksize;
5051 ent->last_chunk_seq = chunk_seq;
5052 ent->num_chunks++;
5053 dlist_push_tail(&ent->chunks, &change->node);
5054}
5055
5056/*
5057 * Rejigger change->newtuple to point to in-memory toast tuples instead of
5058 * on-disk toast tuples that may no longer exist (think DROP TABLE or VACUUM).
5059 *
5060 * We cannot replace unchanged toast tuples though, so those will still point
5061 * to on-disk toast data.
5062 *
5063 * While updating the existing change with detoasted tuple data, we need to
5064 * update the memory accounting info, because the change size will differ.
5065 * Otherwise the accounting may get out of sync, triggering serialization
5066 * at unexpected times.
5067 *
5068 * We simply subtract size of the change before rejiggering the tuple, and
5069 * then add the new size. This makes it look like the change was removed
5070 * and then added back, except it only tweaks the accounting info.
5071 *
5072 * In particular it can't trigger serialization, which would be pointless
5073 * anyway as it happens during commit processing right before handing
5074 * the change to the output plugin.
5075 */
5076static void
5078 Relation relation, ReorderBufferChange *change)
5079{
5080 TupleDesc desc;
5081 int natt;
5082 Datum *attrs;
5083 bool *isnull;
5084 bool *free;
5086 Relation toast_rel;
5088 MemoryContext oldcontext;
5090 Size old_size;
5091
5092 /* no toast tuples changed */
5093 if (txn->toast_hash == NULL)
5094 return;
5095
5096 /*
5097 * We're going to modify the size of the change. So, to make sure the
5098 * accounting is correct we record the current change size and then after
5099 * re-computing the change we'll subtract the recorded size and then
5100 * re-add the new change size at the end. We don't immediately subtract
5101 * the old size because if there is any error before we add the new size,
5102 * we will release the changes and that will update the accounting info
5103 * (subtracting the size from the counters). And we don't want to
5104 * underflow there.
5105 */
5107
5108 oldcontext = MemoryContextSwitchTo(rb->context);
5109
5110 /* we should only have toast tuples in an INSERT or UPDATE */
5111 Assert(change->data.tp.newtuple);
5112
5113 desc = RelationGetDescr(relation);
5114
5115 toast_rel = RelationIdGetRelation(relation->rd_rel->reltoastrelid);
5116 if (!RelationIsValid(toast_rel))
5117 elog(ERROR, "could not open toast relation with OID %u (base relation \"%s\")",
5118 relation->rd_rel->reltoastrelid, RelationGetRelationName(relation));
5119
5120 toast_desc = RelationGetDescr(toast_rel);
5121
5122 /* should we allocate from stack instead? */
5123 attrs = palloc0_array(Datum, desc->natts);
5124 isnull = palloc0_array(bool, desc->natts);
5125 free = palloc0_array(bool, desc->natts);
5126
5127 newtup = change->data.tp.newtuple;
5128
5129 heap_deform_tuple(newtup, desc, attrs, isnull);
5130
5131 for (natt = 0; natt < desc->natts; natt++)
5132 {
5135 struct varlena *varlena;
5136
5137 /* va_rawsize is the size of the original datum -- including header */
5138 struct varatt_external toast_pointer;
5140 struct varlena *new_datum = NULL;
5141 struct varlena *reconstructed;
5142 dlist_iter it;
5143 Size data_done = 0;
5144
5145 if (attr->attisdropped)
5146 continue;
5147
5148 /* not a varlena datatype */
5149 if (attr->attlen != -1)
5150 continue;
5151
5152 /* no data */
5153 if (isnull[natt])
5154 continue;
5155
5156 /* ok, we know we have a toast datum */
5157 varlena = (struct varlena *) DatumGetPointer(attrs[natt]);
5158
5159 /* no need to do anything if the tuple isn't external */
5161 continue;
5162
5163 VARATT_EXTERNAL_GET_POINTER(toast_pointer, varlena);
5164
5165 /*
5166 * Check whether the toast tuple changed, replace if so.
5167 */
5170 &toast_pointer.va_valueid,
5171 HASH_FIND,
5172 NULL);
5173 if (ent == NULL)
5174 continue;
5175
5176 new_datum =
5178
5179 free[natt] = true;
5180
5181 reconstructed = palloc0(toast_pointer.va_rawsize);
5182
5183 ent->reconstructed = reconstructed;
5184
5185 /* stitch toast tuple back together from its parts */
5186 dlist_foreach(it, &ent->chunks)
5187 {
5188 bool cisnull;
5191 Pointer chunk;
5192
5194 ctup = cchange->data.tp.newtuple;
5196
5197 Assert(!cisnull);
5200
5201 memcpy(VARDATA(reconstructed) + data_done,
5202 VARDATA(chunk),
5205 }
5206 Assert(data_done == VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer));
5207
5208 /* make sure its marked as compressed or not */
5209 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
5210 SET_VARSIZE_COMPRESSED(reconstructed, data_done + VARHDRSZ);
5211 else
5212 SET_VARSIZE(reconstructed, data_done + VARHDRSZ);
5213
5215 redirect_pointer.pointer = reconstructed;
5216
5219 sizeof(redirect_pointer));
5220
5222 }
5223
5224 /*
5225 * Build tuple in separate memory & copy tuple back into the tuplebuf
5226 * passed to the output plugin. We can't directly heap_fill_tuple() into
5227 * the tuplebuf because attrs[] will point back into the current content.
5228 */
5229 tmphtup = heap_form_tuple(desc, attrs, isnull);
5230 Assert(newtup->t_len <= MaxHeapTupleSize);
5231 Assert(newtup->t_data == (HeapTupleHeader) ((char *) newtup + HEAPTUPLESIZE));
5232
5233 memcpy(newtup->t_data, tmphtup->t_data, tmphtup->t_len);
5234 newtup->t_len = tmphtup->t_len;
5235
5236 /*
5237 * free resources we won't further need, more persistent stuff will be
5238 * free'd in ReorderBufferToastReset().
5239 */
5240 RelationClose(toast_rel);
5241 pfree(tmphtup);
5242 for (natt = 0; natt < desc->natts; natt++)
5243 {
5244 if (free[natt])
5246 }
5247 pfree(attrs);
5248 pfree(free);
5249 pfree(isnull);
5250
5251 MemoryContextSwitchTo(oldcontext);
5252
5253 /* subtract the old change size */
5255 /* now add the change back, with the correct size */
5257 ReorderBufferChangeSize(change));
5258}
5259
5260/*
5261 * Free all resources allocated for toast reconstruction.
5262 */
5263static void
5265{
5268
5269 if (txn->toast_hash == NULL)
5270 return;
5271
5272 /* sequentially walk over the hash and free everything */
5275 {
5277
5278 if (ent->reconstructed != NULL)
5279 pfree(ent->reconstructed);
5280
5281 dlist_foreach_modify(it, &ent->chunks)
5282 {
5283 ReorderBufferChange *change =
5285
5286 dlist_delete(&change->node);
5287 ReorderBufferFreeChange(rb, change, true);
5288 }
5289 }
5290
5292 txn->toast_hash = NULL;
5293}
5294
5295
5296/* ---------------------------------------
5297 * Visibility support for logical decoding
5298 *
5299 *
5300 * Lookup actual cmin/cmax values when using decoding snapshot. We can't
5301 * always rely on stored cmin/cmax values because of two scenarios:
5302 *
5303 * * A tuple got changed multiple times during a single transaction and thus
5304 * has got a combo CID. Combo CIDs are only valid for the duration of a
5305 * single transaction.
5306 * * A tuple with a cmin but no cmax (and thus no combo CID) got
5307 * deleted/updated in another transaction than the one which created it
5308 * which we are looking at right now. As only one of cmin, cmax or combo CID
5309 * is actually stored in the heap we don't have access to the value we
5310 * need anymore.
5311 *
5312 * To resolve those problems we have a per-transaction hash of (cmin,
5313 * cmax) tuples keyed by (relfilelocator, ctid) which contains the actual
5314 * (cmin, cmax) values. That also takes care of combo CIDs by simply
5315 * not caring about them at all. As we have the real cmin/cmax values
5316 * combo CIDs aren't interesting.
5317 *
5318 * As we only care about catalog tuples here the overhead of this
5319 * hashtable should be acceptable.
5320 *
5321 * Heap rewrites complicate this a bit, check rewriteheap.c for
5322 * details.
5323 * -------------------------------------------------------------------------
5324 */
5325
5326/* struct for sorting mapping files by LSN efficiently */
5327typedef struct RewriteMappingFile
5328{
5330 char fname[MAXPGPATH];
5332
5333#ifdef NOT_USED
5334static void
5336{
5339
5342 {
5343 elog(DEBUG3, "mapping: node: %u/%u/%u tid: %u/%u cmin: %u, cmax: %u",
5344 ent->key.rlocator.dbOid,
5345 ent->key.rlocator.spcOid,
5346 ent->key.rlocator.relNumber,
5347 ItemPointerGetBlockNumber(&ent->key.tid),
5349 ent->cmin,
5350 ent->cmax
5351 );
5352 }
5353}
5354#endif
5355
5356/*
5357 * Apply a single mapping file to tuplecid_data.
5358 *
5359 * The mapping file has to have been verified to be a) committed b) for our
5360 * transaction c) applied in LSN order.
5361 */
5362static void
5363ApplyLogicalMappingFile(HTAB *tuplecid_data, Oid relid, const char *fname)
5364{
5365 char path[MAXPGPATH];
5366 int fd;
5367 int readBytes;
5369
5370 sprintf(path, "%s/%s", PG_LOGICAL_MAPPINGS_DIR, fname);
5372 if (fd < 0)
5373 ereport(ERROR,
5375 errmsg("could not open file \"%s\": %m", path)));
5376
5377 while (true)
5378 {
5382 bool found;
5383
5384 /* be careful about padding */
5385 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
5386
5387 /* read all mappings till the end of the file */
5389 readBytes = read(fd, &map, sizeof(LogicalRewriteMappingData));
5391
5392 if (readBytes < 0)
5393 ereport(ERROR,
5395 errmsg("could not read file \"%s\": %m",
5396 path)));
5397 else if (readBytes == 0) /* EOF */
5398 break;
5399 else if (readBytes != sizeof(LogicalRewriteMappingData))
5400 ereport(ERROR,
5402 errmsg("could not read from file \"%s\": read %d instead of %d bytes",
5403 path, readBytes,
5404 (int32) sizeof(LogicalRewriteMappingData))));
5405
5406 key.rlocator = map.old_locator;
5408 &key.tid);
5409
5410
5413
5414 /* no existing mapping, no need to update */
5415 if (!ent)
5416 continue;
5417
5418 key.rlocator = map.new_locator;
5420 &key.tid);
5421
5423 hash_search(tuplecid_data, &key, HASH_ENTER, &found);
5424
5425 if (found)
5426 {
5427 /*
5428 * Make sure the existing mapping makes sense. We sometime update
5429 * old records that did not yet have a cmax (e.g. pg_class' own
5430 * entry while rewriting it) during rewrites, so allow that.
5431 */
5432 Assert(ent->cmin == InvalidCommandId || ent->cmin == new_ent->cmin);
5433 Assert(ent->cmax == InvalidCommandId || ent->cmax == new_ent->cmax);
5434 }
5435 else
5436 {
5437 /* update mapping */
5438 new_ent->cmin = ent->cmin;
5439 new_ent->cmax = ent->cmax;
5440 new_ent->combocid = ent->combocid;
5441 }
5442 }
5443
5444 if (CloseTransientFile(fd) != 0)
5445 ereport(ERROR,
5447 errmsg("could not close file \"%s\": %m", path)));
5448}
5449
5450
5451/*
5452 * Check whether the TransactionId 'xid' is in the pre-sorted array 'xip'.
5453 */
5454static bool
5456{
5457 return bsearch(&xid, xip, num,
5458 sizeof(TransactionId), xidComparator) != NULL;
5459}
5460
5461/*
5462 * list_sort() comparator for sorting RewriteMappingFiles in LSN order.
5463 */
5464static int
5466{
5469
5470 return pg_cmp_u64(a->lsn, b->lsn);
5471}
5472
5473/*
5474 * Apply any existing logical remapping files if there are any targeted at our
5475 * transaction for relid.
5476 */
5477static void
5479{
5481 struct dirent *mapping_de;
5482 List *files = NIL;
5483 ListCell *file;
5484 Oid dboid = IsSharedRelation(relid) ? InvalidOid : MyDatabaseId;
5485
5488 {
5489 Oid f_dboid;
5490 Oid f_relid;
5494 uint32 f_hi,
5495 f_lo;
5497
5498 if (strcmp(mapping_de->d_name, ".") == 0 ||
5499 strcmp(mapping_de->d_name, "..") == 0)
5500 continue;
5501
5502 /* Ignore files that aren't ours */
5503 if (strncmp(mapping_de->d_name, "map-", 4) != 0)
5504 continue;
5505
5507 &f_dboid, &f_relid, &f_hi, &f_lo,
5508 &f_mapped_xid, &f_create_xid) != 6)
5509 elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
5510
5511 f_lsn = ((uint64) f_hi) << 32 | f_lo;
5512
5513 /* mapping for another database */
5514 if (f_dboid != dboid)
5515 continue;
5516
5517 /* mapping for another relation */
5518 if (f_relid != relid)
5519 continue;
5520
5521 /* did the creating transaction abort? */
5523 continue;
5524
5525 /* not for our transaction */
5526 if (!TransactionIdInArray(f_mapped_xid, snapshot->subxip, snapshot->subxcnt))
5527 continue;
5528
5529 /* ok, relevant, queue for apply */
5531 f->lsn = f_lsn;
5532 strcpy(f->fname, mapping_de->d_name);
5533 files = lappend(files, f);
5534 }
5536
5537 /* sort files so we apply them in LSN order */
5539
5540 foreach(file, files)
5541 {
5543
5544 elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
5545 snapshot->subxip[0]);
5547 pfree(f);
5548 }
5549}
5550
5551/*
5552 * Lookup cmin/cmax of a tuple, during logical decoding where we can't rely on
5553 * combo CIDs.
5554 */
5555bool
5557 Snapshot snapshot,
5558 HeapTuple htup, Buffer buffer,
5559 CommandId *cmin, CommandId *cmax)
5560{
5563 ForkNumber forkno;
5564 BlockNumber blockno;
5565 bool updated_mapping = false;
5566
5567 /*
5568 * Return unresolved if tuplecid_data is not valid. That's because when
5569 * streaming in-progress transactions we may run into tuples with the CID
5570 * before actually decoding them. Think e.g. about INSERT followed by
5571 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5572 * INSERT. So in such cases, we assume the CID is from the future
5573 * command.
5574 */
5575 if (tuplecid_data == NULL)
5576 return false;
5577
5578 /* be careful about padding */
5579 memset(&key, 0, sizeof(key));
5580
5581 Assert(!BufferIsLocal(buffer));
5582
5583 /*
5584 * get relfilelocator from the buffer, no convenient way to access it
5585 * other than that.
5586 */
5587 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5588
5589 /* tuples can only be in the main fork */
5590 Assert(forkno == MAIN_FORKNUM);
5591 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5592
5593 ItemPointerCopy(&htup->t_self,
5594 &key.tid);
5595
5596restart:
5599
5600 /*
5601 * failed to find a mapping, check whether the table was rewritten and
5602 * apply mapping if so, but only do that once - there can be no new
5603 * mappings while we are in here since we have to hold a lock on the
5604 * relation.
5605 */
5606 if (ent == NULL && !updated_mapping)
5607 {
5609 /* now check but don't update for a mapping again */
5610 updated_mapping = true;
5611 goto restart;
5612 }
5613 else if (ent == NULL)
5614 return false;
5615
5616 if (cmin)
5617 *cmin = ent->cmin;
5618 if (cmax)
5619 *cmax = ent->cmax;
5620 return true;
5621}
5622
5623/*
5624 * Count invalidation messages of specified transaction.
5625 *
5626 * Returns number of messages, and msgs is set to the pointer of the linked
5627 * list for the messages.
5628 */
5629uint32
5632{
5633 ReorderBufferTXN *txn;
5634
5635 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
5636 false);
5637
5638 if (txn == NULL)
5639 return 0;
5640
5641 *msgs = txn->invalidations;
5642
5643 return txn->ninvalidations;
5644}
void binaryheap_build(binaryheap *heap)
Definition binaryheap.c:136
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:253
bh_node_type binaryheap_first(binaryheap *heap)
Definition binaryheap.c:175
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition binaryheap.c:190
void binaryheap_free(binaryheap *heap)
Definition binaryheap.c:73
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:114
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition binaryheap.c:37
#define binaryheap_empty(h)
Definition binaryheap.h:65
uint32 BlockNumber
Definition block.h:31
static int32 next
Definition blutils.c:225
static void cleanup(void)
Definition bootstrap.c:717
int Buffer
Definition buf.h:23
#define BufferIsLocal(buffer)
Definition buf.h:37
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition bufmgr.c:4377
#define NameStr(name)
Definition c.h:775
#define InvalidCommandId
Definition c.h:693
#define VARHDRSZ
Definition c.h:721
#define Assert(condition)
Definition c.h:883
#define PG_BINARY
Definition c.h:1281
#define FirstCommandId
Definition c.h:692
int32_t int32
Definition c.h:552
uint64_t uint64
Definition c.h:557
#define unlikely(x)
Definition c.h:422
uint32_t uint32
Definition c.h:556
void * Pointer
Definition c.h:547
uint32 CommandId
Definition c.h:690
uint32 TransactionId
Definition c.h:676
size_t Size
Definition c.h:629
bool IsToastRelation(Relation relation)
Definition catalog.c:206
bool IsSharedRelation(Oid relationId)
Definition catalog.c:304
int64 TimestampTz
Definition timestamp.h:39
#define INDIRECT_POINTER_SIZE
Definition detoast.h:34
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr)
Definition detoast.h:22
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:358
void hash_destroy(HTAB *hashp)
Definition dynahash.c:865
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1415
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1380
struct cursor * cur
Definition ecpg.c:29
void FreeErrorData(ErrorData *edata)
Definition elog.c:1835
int errcode_for_file_access(void)
Definition elog.c:886
ErrorData * CopyErrorData(void)
Definition elog.c:1763
void FlushErrorState(void)
Definition elog.c:1884
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define PG_RE_THROW()
Definition elog.h:405
#define DEBUG3
Definition elog.h:28
#define PG_TRY(...)
Definition elog.h:372
#define DEBUG2
Definition elog.h:29
#define PG_END_TRY(...)
Definition elog.h:397
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define PG_CATCH(...)
Definition elog.h:382
#define elog(elevel,...)
Definition elog.h:226
#define INFO
Definition elog.h:34
#define ereport(elevel,...)
Definition elog.h:150
int FreeDir(DIR *dir)
Definition fd.c:3005
int CloseTransientFile(int fd)
Definition fd.c:2851
struct dirent * ReadDirExtended(DIR *dir, const char *dirname, int elevel)
Definition fd.c:2968
void FileClose(File file)
Definition fd.c:1962
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition fd.c:1559
DIR * AllocateDir(const char *dirname)
Definition fd.c:2887
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2953
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2674
static ssize_t FileRead(File file, void *buffer, size_t amount, pgoff_t offset, uint32 wait_event_info)
Definition fd.h:214
int File
Definition fd.h:51
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
MemoryContext GenerationContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition generation.c:162
Oid MyDatabaseId
Definition globals.c:94
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1117
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition heaptuple.c:1346
@ HASH_FIND
Definition hsearch.h:113
@ HASH_REMOVE
Definition hsearch.h:115
@ HASH_ENTER
Definition hsearch.h:114
#define HASH_CONTEXT
Definition hsearch.h:102
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_BLOBS
Definition hsearch.h:97
#define HEAPTUPLESIZE
Definition htup.h:73
HeapTupleData * HeapTuple
Definition htup.h:71
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
#define SizeofHeapTupleHeader
#define MaxHeapTupleSize
static Datum fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
static dlist_node * dlist_pop_head_node(dlist_head *head)
Definition ilist.h:450
#define dlist_foreach(iter, lhead)
Definition ilist.h:623
static void dlist_init(dlist_head *head)
Definition ilist.h:314
#define dclist_container(type, membername, ptr)
Definition ilist.h:947
static bool dlist_has_next(const dlist_head *head, const dlist_node *node)
Definition ilist.h:503
static void dclist_push_tail(dclist_head *head, dlist_node *node)
Definition ilist.h:709
static void dlist_insert_before(dlist_node *before, dlist_node *node)
Definition ilist.h:393
#define dlist_head_element(type, membername, lhead)
Definition ilist.h:603
static dlist_node * dlist_next_node(dlist_head *head, dlist_node *node)
Definition ilist.h:537
static void dlist_delete(dlist_node *node)
Definition ilist.h:405
static uint32 dclist_count(const dclist_head *head)
Definition ilist.h:932
#define dlist_foreach_modify(iter, lhead)
Definition ilist.h:640
static bool dlist_is_empty(const dlist_head *head)
Definition ilist.h:336
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition ilist.h:364
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition ilist.h:763
static void dclist_init(dclist_head *head)
Definition ilist.h:671
#define dlist_container(type, membername, ptr)
Definition ilist.h:593
#define dclist_foreach(iter, lhead)
Definition ilist.h:970
static int pg_cmp_u64(uint64 a, uint64 b)
Definition int.h:731
#define write(a, b, c)
Definition win32.h:14
#define read(a, b, c)
Definition win32.h:13
void LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
Definition inval.c:823
void InvalidateSystemCaches(void)
Definition inval.c:916
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition itemptr.h:172
List * lappend(List *list, void *datum)
Definition list.c:339
void list_sort(List *list, list_sort_comparator cmp)
Definition list.c:1674
void UpdateDecodingStats(LogicalDecodingContext *ctx)
Definition logical.c:1943
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition mcxt.c:1266
char * pstrdup(const char *in)
Definition mcxt.c:1781
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
void * palloc(Size size)
Definition mcxt.c:1387
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define SLAB_DEFAULT_BLOCK_SIZE
Definition memutils.h:189
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
void pairingheap_remove(pairingheap *heap, pairingheap_node *node)
void pairingheap_add(pairingheap *heap, pairingheap_node *node)
pairingheap * pairingheap_allocate(pairingheap_comparator compare, void *arg)
Definition pairingheap.c:42
pairingheap_node * pairingheap_first(pairingheap *heap)
#define pairingheap_container(type, membername, ptr)
Definition pairingheap.h:43
#define pairingheap_const_container(type, membername, ptr)
Definition pairingheap.h:51
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
void * arg
#define MAXPGPATH
const void * data
#define lfirst(lc)
Definition pg_list.h:172
#define NIL
Definition pg_list.h:68
#define sprintf
Definition port.h:262
#define snprintf
Definition port.h:260
#define qsort(a, b, c, d)
Definition port.h:495
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
static Oid DatumGetObjectId(Datum X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
static Datum Int32GetDatum(int32 X)
Definition postgres.h:222
static int32 DatumGetInt32(Datum X)
Definition postgres.h:212
#define InvalidOid
unsigned int Oid
static int fd(const char *x, int i)
static int fb(int x)
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1404
#define RelationIsLogicallyLogged(relation)
Definition rel.h:710
#define RelationGetDescr(relation)
Definition rel.h:540
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationIsValid(relation)
Definition rel.h:489
Relation RelationIdGetRelation(Oid relationId)
Definition relcache.c:2094
void RelationClose(Relation relation)
Definition relcache.c:2215
Oid RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber)
ForkNumber
Definition relpath.h:56
@ MAIN_FORKNUM
Definition relpath.h:58
#define relpathperm(rlocator, forknum)
Definition relpath.h:146
static int file_sort_by_lsn(const ListCell *a_p, const ListCell *b_p)
void ReorderBufferFreeRelids(ReorderBuffer *rb, Oid *relids)
void ReorderBufferFreeChange(ReorderBuffer *rb, ReorderBufferChange *change, bool upd_mem)
static void ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
void ReorderBufferXidSetCatalogChanges(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
static void ReorderBufferStreamCommit(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferAddNewCommandId(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, CommandId cid)
static void ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferAccumulateInvalidations(SharedInvalidationMessage **invals_out, uint32 *ninvals_out, SharedInvalidationMessage *msgs_new, Size nmsgs_new)
static ReorderBufferTXN * ReorderBufferLargestTXN(ReorderBuffer *rb)
void ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, RelFileLocator locator, ItemPointerData tid, CommandId cmin, CommandId cmax, CommandId combocid)
static void ApplyLogicalMappingFile(HTAB *tuplecid_data, Oid relid, const char *fname)
void ReorderBufferSetBaseSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
static void ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferToastInitHash(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, TimestampTz abort_time)
static bool ReorderBufferCanStartStreaming(ReorderBuffer *rb)
static void ReorderBufferResetTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id, XLogRecPtr last_lsn, ReorderBufferChange *specinsert)
bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *rb, TransactionId xid)
void ReorderBufferInvalidate(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
TransactionId ReorderBufferGetOldestXmin(ReorderBuffer *rb)
static int ReorderBufferIterCompare(Datum a, Datum b, void *arg)
static void ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferIterTXNState *volatile *iter_state)
bool ResolveCminCmaxDuringDecoding(HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
static void ReorderBufferToastAppendChunk(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
void ReorderBufferFreeTupleBuf(HeapTuple tuple)
void ReorderBufferQueueChange(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, ReorderBufferChange *change, bool toast_insert)
static void ReorderBufferReplay(ReorderBufferTXN *txn, ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, RepOriginId origin_id, XLogRecPtr origin_lsn)
void ReorderBufferPrepare(ReorderBuffer *rb, TransactionId xid, char *gid)
uint32 ReorderBufferGetInvalidations(ReorderBuffer *rb, TransactionId xid, SharedInvalidationMessage **msgs)
void ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
void ReorderBufferCommitChild(ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn)
TransactionId * ReorderBufferGetCatalogChangesXacts(ReorderBuffer *rb)
static void ReorderBufferSaveTXNSnapshot(ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id)
#define IsSpecInsert(action)
static Size ReorderBufferChangeSize(ReorderBufferChange *change)
int logical_decoding_work_mem
static void AssertChangeLsnOrder(ReorderBufferTXN *txn)
static bool ReorderBufferCanStream(ReorderBuffer *rb)
static int ReorderBufferTXNSizeCompare(const pairingheap_node *a, const pairingheap_node *b, void *arg)
static void ReorderBufferApplyChange(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change, bool streaming)
void ReorderBufferSkipPrepare(ReorderBuffer *rb, TransactionId xid)
static void ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn, int fd, ReorderBufferChange *change)
void ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
int debug_logical_replication_streaming
void ReorderBufferAddDistributedInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
#define IsInsertOrUpdate(action)
static void ReorderBufferSerializeReserve(ReorderBuffer *rb, Size sz)
void ReorderBufferQueueMessage(ReorderBuffer *rb, TransactionId xid, Snapshot snap, XLogRecPtr lsn, bool transactional, const char *prefix, Size message_size, const char *message)
bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *rb, TransactionId xid)
static void ReorderBufferExecuteInvalidations(uint32 nmsgs, SharedInvalidationMessage *msgs)
static void ReorderBufferIterTXNFinish(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
void ReorderBufferAddSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
static void ReorderBufferTruncateTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, bool txn_prepared)
#define CHANGES_THRESHOLD
static ReorderBufferTXN * ReorderBufferLargestStreamableTopTXN(ReorderBuffer *rb)
static bool ReorderBufferCheckAndTruncateAbortedTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn, char *data)
HeapTuple ReorderBufferAllocTupleBuf(ReorderBuffer *rb, Size tuple_len)
void ReorderBufferFinishPrepared(ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, XLogRecPtr two_phase_at, TimestampTz commit_time, RepOriginId origin_id, XLogRecPtr origin_lsn, char *gid, bool is_commit)
static void AssertTXNLsnOrder(ReorderBuffer *rb)
#define MAX_DISTR_INVAL_MSG_PER_TXN
static void ReorderBufferApplyMessage(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool streaming)
static void ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap)
static void ReorderBufferCleanupSerializedTXNs(const char *slotname)
ReorderBufferChange * ReorderBufferAllocChange(ReorderBuffer *rb)
void ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, RepOriginId origin_id, XLogRecPtr origin_lsn)
void ReorderBufferSetRestartPoint(ReorderBuffer *rb, XLogRecPtr ptr)
static void SetupCheckXidLive(TransactionId xid)
static bool TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
static Snapshot ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap, ReorderBufferTXN *txn, CommandId cid)
static void ReorderBufferApplyTruncate(ReorderBuffer *rb, ReorderBufferTXN *txn, int nrelations, Relation *relations, ReorderBufferChange *change, bool streaming)
static void ReorderBufferProcessPartialChange(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool toast_insert)
static void ReorderBufferToastReset(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static void ReorderBufferQueueInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
static ReorderBufferTXN * ReorderBufferAllocTXN(ReorderBuffer *rb)
bool ReorderBufferRememberPrepareInfo(ReorderBuffer *rb, TransactionId xid, XLogRecPtr prepare_lsn, XLogRecPtr end_lsn, TimestampTz prepare_time, RepOriginId origin_id, XLogRecPtr origin_lsn)
static void ReorderBufferFreeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferImmediateInvalidation(ReorderBuffer *rb, uint32 ninvalidations, SharedInvalidationMessage *invalidations)
static void ReorderBufferTransferSnapToParent(ReorderBufferTXN *txn, ReorderBufferTXN *subtxn)
static void ReorderBufferBuildTupleCidHash(ReorderBuffer *rb, ReorderBufferTXN *txn)
static ReorderBufferChange * ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
Oid * ReorderBufferAllocRelids(ReorderBuffer *rb, int nrelids)
static void ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
static void ReorderBufferChangeMemoryUpdate(ReorderBuffer *rb, ReorderBufferChange *change, ReorderBufferTXN *txn, bool addition, Size sz)
static void ReorderBufferStreamTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferProcessXid(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
static Size ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn, TXNEntryFile *file, XLogSegNo *segno)
void ReorderBufferAssignChild(ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr lsn)
void ReorderBufferFree(ReorderBuffer *rb)
static void ReorderBufferSerializedPath(char *path, ReplicationSlot *slot, TransactionId xid, XLogSegNo segno)
#define IsSpecConfirmOrAbort(action)
static const Size max_changes_in_memory
void StartupReorderBuffer(void)
void ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid)
static ReorderBufferTXN * ReorderBufferTXNByXid(ReorderBuffer *rb, TransactionId xid, bool create, bool *is_new, XLogRecPtr lsn, bool create_as_top)
static void ReorderBufferMaybeMarkTXNStreamed(ReorderBuffer *rb, ReorderBufferTXN *txn)
ReorderBufferTXN * ReorderBufferGetOldestTXN(ReorderBuffer *rb)
static void ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr commit_lsn, volatile Snapshot snapshot_now, volatile CommandId command_id, bool streaming)
#define rbtxn_is_committed(txn)
#define rbtxn_has_streamable_change(txn)
#define rbtxn_has_catalog_changes(txn)
@ DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE
@ DEBUG_LOGICAL_REP_STREAMING_BUFFERED
#define RBTXN_PREPARE_STATUS_MASK
#define rbtxn_is_serialized_clear(txn)
#define RBTXN_IS_STREAMED
#define rbtxn_is_prepared(txn)
#define RBTXN_HAS_PARTIAL_CHANGE
#define rbtxn_is_streamed(txn)
#define RBTXN_SENT_PREPARE
#define rbtxn_is_toptxn(txn)
#define rbtxn_get_toptxn(txn)
#define rbtxn_is_known_subxact(txn)
#define rbtxn_is_subtxn(txn)
#define RBTXN_HAS_CATALOG_CHANGES
#define RBTXN_IS_COMMITTED
#define PG_LOGICAL_MAPPINGS_DIR
#define RBTXN_DISTR_INVAL_OVERFLOWED
#define RBTXN_IS_SERIALIZED_CLEAR
#define rbtxn_sent_prepare(txn)
#define RBTXN_IS_PREPARED
#define rbtxn_distr_inval_overflowed(txn)
#define RBTXN_SKIPPED_PREPARE
#define RBTXN_HAS_STREAMABLE_CHANGE
@ REORDER_BUFFER_CHANGE_INVALIDATION
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM
@ REORDER_BUFFER_CHANGE_MESSAGE
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT
@ REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID
@ REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID
@ REORDER_BUFFER_CHANGE_TRUNCATE
@ REORDER_BUFFER_CHANGE_DELETE
@ REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT
#define rbtxn_is_aborted(txn)
#define RBTXN_IS_SERIALIZED
#define rbtxn_is_serialized(txn)
#define RBTXN_IS_ABORTED
#define RBTXN_IS_SUBXACT
#define rbtxn_has_partial_change(txn)
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
#define LOGICAL_REWRITE_FORMAT
Definition rewriteheap.h:54
MemoryContext SlabContextCreate(MemoryContext parent, const char *name, Size blockSize, Size chunkSize)
Definition slab.c:322
ReplicationSlot * MyReplicationSlot
Definition slot.c:148
bool ReplicationSlotValidateName(const char *name, bool allow_reserved_name, int elevel)
Definition slot.c:266
#define PG_REPLSLOT_DIR
Definition slot.h:21
void SnapBuildSnapDecRefcount(Snapshot snap)
Definition snapbuild.c:328
bool SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr)
Definition snapbuild.c:304
SnapBuildState SnapBuildCurrentState(SnapBuild *builder)
Definition snapbuild.c:277
@ SNAPBUILD_CONSISTENT
Definition snapbuild.h:50
void TeardownHistoricSnapshot(bool is_error)
Definition snapmgr.c:1685
void SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
Definition snapmgr.c:1669
static HTAB * tuplecid_data
Definition snapmgr.c:163
struct SnapshotData * Snapshot
Definition snapshot.h:117
#define free(a)
bool attisdropped
Definition tupdesc.h:77
Definition dirent.c:26
Size keysize
Definition hsearch.h:75
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
Definition pg_list.h:54
XLogReaderState * reader
Definition logical.h:42
struct SnapBuild * snapshot_builder
Definition logical.h:44
ItemPointerData new_tid
Definition rewriteheap.h:40
RelFileLocator old_locator
Definition rewriteheap.h:37
ItemPointerData old_tid
Definition rewriteheap.h:39
RelFileLocator new_locator
Definition rewriteheap.h:38
RelFileNumber relNumber
Form_pg_class rd_rel
Definition rel.h:111
struct ReorderBufferChange::@113::@117 tuplecid
ReorderBufferChangeType action
struct ReorderBufferChange::@113::@115 truncate
RelFileLocator rlocator
ItemPointerData tid
union ReorderBufferChange::@113 data
struct ReorderBufferChange::@113::@118 inval
struct ReorderBufferChange::@113::@114 tp
struct ReorderBufferTXN * txn
RelFileLocator locator
RepOriginId origin_id
struct ReorderBufferChange::@113::@116 msg
SharedInvalidationMessage * invalidations
ReorderBufferChange change
ReorderBufferChange * change
ReorderBufferTXN * txn
XLogRecPtr restart_decoding_lsn
pairingheap_node txn_node
TimestampTz commit_time
XLogRecPtr base_snapshot_lsn
TransactionId toplevel_xid
dlist_node catchange_node
SharedInvalidationMessage * invalidations
RepOriginId origin_id
dlist_head tuplecids
XLogRecPtr first_lsn
TimestampTz abort_time
XLogRecPtr final_lsn
void * output_plugin_private
uint32 ninvalidations_distributed
XLogRecPtr origin_lsn
TimestampTz prepare_time
TransactionId xid
dlist_node base_snapshot_node
SharedInvalidationMessage * invalidations_distributed
dlist_head txns_by_base_snapshot_lsn
MemoryContext context
dclist_head catchange_txns
MemoryContext change_context
ReorderBufferTXN * by_txn_last_txn
TransactionId by_txn_last_xid
MemoryContext tup_context
dlist_head toplevel_by_lsn
pairingheap * txn_heap
MemoryContext txn_context
XLogRecPtr current_restart_decoding_lsn
ReplicationSlotPersistentData data
Definition slot.h:210
char fname[MAXPGPATH]
TransactionId xmin
Definition snapshot.h:153
int32 subxcnt
Definition snapshot.h:177
CommandId curcid
Definition snapshot.h:183
uint32 xcnt
Definition snapshot.h:165
TransactionId * subxip
Definition snapshot.h:176
XLogRecPtr EndRecPtr
Definition xlogreader.h:206
XLogRecPtr ReadRecPtr
Definition xlogreader.h:205
dlist_node * cur
Definition ilist.h:179
dlist_node * cur
Definition ilist.h:200
Definition c.h:716
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:175
#define VARHDRSZ_SHORT
Definition varatt.h:278
static bool VARATT_IS_SHORT(const void *PTR)
Definition varatt.h:403
static void SET_VARSIZE_COMPRESSED(void *PTR, Size len)
Definition varatt.h:446
static Size VARATT_EXTERNAL_GET_EXTSIZE(struct varatt_external toast_pointer)
Definition varatt.h:507
static bool VARATT_IS_EXTENDED(const void *PTR)
Definition varatt.h:410
static bool VARATT_IS_EXTERNAL(const void *PTR)
Definition varatt.h:354
static char * VARDATA_EXTERNAL(const void *PTR)
Definition varatt.h:340
static Size VARSIZE(const void *PTR)
Definition varatt.h:298
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static void SET_VARTAG_EXTERNAL(void *PTR, vartag_external tag)
Definition varatt.h:453
@ VARTAG_INDIRECT
Definition varatt.h:86
static bool VARATT_EXTERNAL_IS_COMPRESSED(struct varatt_external toast_pointer)
Definition varatt.h:536
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
static Size VARSIZE_SHORT(const void *PTR)
Definition varatt.h:312
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85
#define lstat(path, sb)
Definition win32_port.h:275
#define S_ISDIR(m)
Definition win32_port.h:315
bool IsTransactionOrTransactionBlock(void)
Definition xact.c:5011
void BeginInternalSubTransaction(const char *name)
Definition xact.c:4716
TransactionId CheckXidAlive
Definition xact.c:100
void RollbackAndReleaseCurrentSubTransaction(void)
Definition xact.c:4818
void StartTransactionCommand(void)
Definition xact.c:3080
TransactionId GetCurrentTransactionIdIfAny(void)
Definition xact.c:472
TransactionId GetCurrentTransactionId(void)
Definition xact.c:455
void AbortCurrentTransaction(void)
Definition xact.c:3472
int xidComparator(const void *arg1, const void *arg2)
Definition xid.c:152
int wal_segment_size
Definition xlog.c:146
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint16 RepOriginId
Definition xlogdefs.h:69
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
uint64 XLogSegNo
Definition xlogdefs.h:52

◆ IsSpecConfirmOrAbort

#define IsSpecConfirmOrAbort (   action)
Value:

Definition at line 201 of file reorderbuffer.c.

◆ IsSpecInsert

#define IsSpecInsert (   action)
Value:

Definition at line 197 of file reorderbuffer.c.

◆ MAX_DISTR_INVAL_MSG_PER_TXN

#define MAX_DISTR_INVAL_MSG_PER_TXN    ((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage))

Definition at line 125 of file reorderbuffer.c.

Typedef Documentation

◆ ReorderBufferDiskChange

◆ ReorderBufferIterTXNEntry

◆ ReorderBufferIterTXNState

◆ ReorderBufferToastEnt

◆ ReorderBufferTupleCidEnt

◆ ReorderBufferTupleCidKey

◆ ReorderBufferTXNByIdEnt

◆ RewriteMappingFile

◆ TXNEntryFile

Function Documentation

◆ ApplyLogicalMappingFile()

static void ApplyLogicalMappingFile ( HTAB tuplecid_data,
Oid  relid,
const char fname 
)
static

Definition at line 5364 of file reorderbuffer.c.

5365{
5366 char path[MAXPGPATH];
5367 int fd;
5368 int readBytes;
5370
5371 sprintf(path, "%s/%s", PG_LOGICAL_MAPPINGS_DIR, fname);
5373 if (fd < 0)
5374 ereport(ERROR,
5376 errmsg("could not open file \"%s\": %m", path)));
5377
5378 while (true)
5379 {
5383 bool found;
5384
5385 /* be careful about padding */
5386 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
5387
5388 /* read all mappings till the end of the file */
5390 readBytes = read(fd, &map, sizeof(LogicalRewriteMappingData));
5392
5393 if (readBytes < 0)
5394 ereport(ERROR,
5396 errmsg("could not read file \"%s\": %m",
5397 path)));
5398 else if (readBytes == 0) /* EOF */
5399 break;
5400 else if (readBytes != sizeof(LogicalRewriteMappingData))
5401 ereport(ERROR,
5403 errmsg("could not read from file \"%s\": read %d instead of %d bytes",
5404 path, readBytes,
5405 (int32) sizeof(LogicalRewriteMappingData))));
5406
5407 key.rlocator = map.old_locator;
5409 &key.tid);
5410
5411
5414
5415 /* no existing mapping, no need to update */
5416 if (!ent)
5417 continue;
5418
5419 key.rlocator = map.new_locator;
5421 &key.tid);
5422
5424 hash_search(tuplecid_data, &key, HASH_ENTER, &found);
5425
5426 if (found)
5427 {
5428 /*
5429 * Make sure the existing mapping makes sense. We sometime update
5430 * old records that did not yet have a cmax (e.g. pg_class' own
5431 * entry while rewriting it) during rewrites, so allow that.
5432 */
5433 Assert(ent->cmin == InvalidCommandId || ent->cmin == new_ent->cmin);
5434 Assert(ent->cmax == InvalidCommandId || ent->cmax == new_ent->cmax);
5435 }
5436 else
5437 {
5438 /* update mapping */
5439 new_ent->cmin = ent->cmin;
5440 new_ent->cmax = ent->cmax;
5441 new_ent->combocid = ent->combocid;
5442 }
5443 }
5444
5445 if (CloseTransientFile(fd) != 0)
5446 ereport(ERROR,
5448 errmsg("could not close file \"%s\": %m", path)));
5449}

References Assert, CloseTransientFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), HASH_ENTER, HASH_FIND, hash_search(), InvalidCommandId, ItemPointerCopy(), MAXPGPATH, LogicalRewriteMappingData::new_locator, LogicalRewriteMappingData::new_tid, LogicalRewriteMappingData::old_locator, LogicalRewriteMappingData::old_tid, OpenTransientFile(), PG_BINARY, PG_LOGICAL_MAPPINGS_DIR, pgstat_report_wait_end(), pgstat_report_wait_start(), read, sprintf, and tuplecid_data.

Referenced by UpdateLogicalMappings().

◆ AssertChangeLsnOrder()

static void AssertChangeLsnOrder ( ReorderBufferTXN txn)
static

Definition at line 1013 of file reorderbuffer.c.

1014{
1015#ifdef USE_ASSERT_CHECKING
1016 dlist_iter iter;
1018
1019 dlist_foreach(iter, &txn->changes)
1020 {
1022
1024
1027 Assert(txn->first_lsn <= cur_change->lsn);
1028
1029 if (XLogRecPtrIsValid(txn->end_lsn))
1030 Assert(cur_change->lsn <= txn->end_lsn);
1031
1033
1034 prev_lsn = cur_change->lsn;
1035 }
1036#endif
1037}

References Assert, ReorderBufferTXN::changes, dlist_iter::cur, dlist_container, dlist_foreach, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::first_lsn, and XLogRecPtrIsValid.

Referenced by ReorderBufferIterTXNInit().

◆ AssertTXNLsnOrder()

static void AssertTXNLsnOrder ( ReorderBuffer rb)
static

Definition at line 942 of file reorderbuffer.c.

943{
944#ifdef USE_ASSERT_CHECKING
945 LogicalDecodingContext *ctx = rb->private_data;
946 dlist_iter iter;
949
950 /*
951 * Skip the verification if we don't reach the LSN at which we start
952 * decoding the contents of transactions yet because until we reach the
953 * LSN, we could have transactions that don't have the association between
954 * the top-level transaction and subtransaction yet and consequently have
955 * the same LSN. We don't guarantee this association until we try to
956 * decode the actual contents of transaction. The ordering of the records
957 * prior to the start_decoding_at LSN should have been checked before the
958 * restart.
959 */
961 return;
962
963 dlist_foreach(iter, &rb->toplevel_by_lsn)
964 {
966 iter.cur);
967
968 /* start LSN must be set */
969 Assert(XLogRecPtrIsValid(cur_txn->first_lsn));
970
971 /* If there is an end LSN, it must be higher than start LSN */
972 if (XLogRecPtrIsValid(cur_txn->end_lsn))
973 Assert(cur_txn->first_lsn <= cur_txn->end_lsn);
974
975 /* Current initial LSN must be strictly higher than previous */
978
979 /* known-as-subtxn txns must not be listed */
981
982 prev_first_lsn = cur_txn->first_lsn;
983 }
984
985 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
986 {
988 base_snapshot_node,
989 iter.cur);
990
991 /* base snapshot (and its LSN) must be set */
992 Assert(cur_txn->base_snapshot != NULL);
993 Assert(XLogRecPtrIsValid(cur_txn->base_snapshot_lsn));
994
995 /* current LSN must be strictly higher than previous */
997 Assert(prev_base_snap_lsn < cur_txn->base_snapshot_lsn);
998
999 /* known-as-subtxn txns must not be listed */
1001
1002 prev_base_snap_lsn = cur_txn->base_snapshot_lsn;
1003 }
1004#endif
1005}

References Assert, dlist_iter::cur, dlist_container, dlist_foreach, XLogReaderState::EndRecPtr, fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, LogicalDecodingContext::reader, SnapBuildXactNeedsSkip(), LogicalDecodingContext::snapshot_builder, and XLogRecPtrIsValid.

Referenced by ReorderBufferAssignChild(), ReorderBufferGetOldestTXN(), ReorderBufferGetOldestXmin(), ReorderBufferSetBaseSnapshot(), and ReorderBufferTXNByXid().

◆ file_sort_by_lsn()

static int file_sort_by_lsn ( const ListCell a_p,
const ListCell b_p 
)
static

Definition at line 5466 of file reorderbuffer.c.

5467{
5470
5471 return pg_cmp_u64(a->lsn, b->lsn);
5472}

References a, b, fb(), lfirst, and pg_cmp_u64().

Referenced by UpdateLogicalMappings().

◆ ReorderBufferAbort()

void ReorderBufferAbort ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
TimestampTz  abort_time 
)

Definition at line 3087 of file reorderbuffer.c.

3089{
3090 ReorderBufferTXN *txn;
3091
3092 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3093 false);
3094
3095 /* unknown, nothing to remove */
3096 if (txn == NULL)
3097 return;
3098
3099 txn->abort_time = abort_time;
3100
3101 /* For streamed transactions notify the remote node about the abort. */
3102 if (rbtxn_is_streamed(txn))
3103 {
3104 rb->stream_abort(rb, txn, lsn);
3105
3106 /*
3107 * We might have decoded changes for this transaction that could load
3108 * the cache as per the current transaction's view (consider DDL's
3109 * happened in this transaction). We don't want the decoding of future
3110 * transactions to use those cache entries so execute only the inval
3111 * messages in this transaction.
3112 */
3113 if (txn->ninvalidations > 0)
3115 txn->invalidations);
3116 }
3117
3118 /* cosmetic... */
3119 txn->final_lsn = lsn;
3120
3121 /* remove potential on-disk data, and deallocate */
3123}

References ReorderBufferTXN::abort_time, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodeAbort().

◆ ReorderBufferAbortOld()

void ReorderBufferAbortOld ( ReorderBuffer rb,
TransactionId  oldestRunningXid 
)

Definition at line 3133 of file reorderbuffer.c.

3134{
3136
3137 /*
3138 * Iterate through all (potential) toplevel TXNs and abort all that are
3139 * older than what possibly can be running. Once we've found the first
3140 * that is alive we stop, there might be some that acquired an xid earlier
3141 * but started writing later, but it's unlikely and they will be cleaned
3142 * up in a later call to this function.
3143 */
3144 dlist_foreach_modify(it, &rb->toplevel_by_lsn)
3145 {
3146 ReorderBufferTXN *txn;
3147
3148 txn = dlist_container(ReorderBufferTXN, node, it.cur);
3149
3150 if (TransactionIdPrecedes(txn->xid, oldestRunningXid))
3151 {
3152 elog(DEBUG2, "aborting old transaction %u", txn->xid);
3153
3154 /* Notify the remote node about the crash/immediate restart. */
3155 if (rbtxn_is_streamed(txn))
3156 rb->stream_abort(rb, txn, InvalidXLogRecPtr);
3157
3158 /* remove potential on-disk data, and deallocate this tx */
3160 }
3161 else
3162 return;
3163 }
3164}

References DEBUG2, dlist_container, dlist_foreach_modify, elog, fb(), InvalidXLogRecPtr, rbtxn_is_streamed, ReorderBufferCleanupTXN(), TransactionIdPrecedes(), and ReorderBufferTXN::xid.

Referenced by standby_decode().

◆ ReorderBufferAccumulateInvalidations()

static void ReorderBufferAccumulateInvalidations ( SharedInvalidationMessage **  invals_out,
uint32 ninvals_out,
SharedInvalidationMessage msgs_new,
Size  nmsgs_new 
)
static

Definition at line 3505 of file reorderbuffer.c.

3509{
3510 if (*ninvals_out == 0)
3511 {
3515 }
3516 else
3517 {
3518 /* Enlarge the array of inval messages */
3521 (*ninvals_out + nmsgs_new));
3525 }
3526}

References fb(), palloc_array, and repalloc().

Referenced by ReorderBufferAddDistributedInvalidations(), and ReorderBufferAddInvalidations().

◆ ReorderBufferAddDistributedInvalidations()

void ReorderBufferAddDistributedInvalidations ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Size  nmsgs,
SharedInvalidationMessage msgs 
)

Definition at line 3583 of file reorderbuffer.c.

3586{
3587 ReorderBufferTXN *txn;
3588 MemoryContext oldcontext;
3589
3590 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3591
3592 oldcontext = MemoryContextSwitchTo(rb->context);
3593
3594 /*
3595 * Collect all the invalidations under the top transaction, if available,
3596 * so that we can execute them all together. See comments
3597 * ReorderBufferAddInvalidations.
3598 */
3599 txn = rbtxn_get_toptxn(txn);
3600
3601 Assert(nmsgs > 0);
3602
3604 {
3605 /*
3606 * Check the transaction has enough space for storing distributed
3607 * invalidation messages.
3608 */
3610 {
3611 /*
3612 * Mark the invalidation message as overflowed and free up the
3613 * messages accumulated so far.
3614 */
3616
3618 {
3622 }
3623 }
3624 else
3627 msgs, nmsgs);
3628 }
3629
3630 /* Queue the invalidation messages into the transaction */
3631 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3632
3633 MemoryContextSwitchTo(oldcontext);
3634}

References Assert, fb(), ReorderBufferTXN::invalidations_distributed, MAX_DISTR_INVAL_MSG_PER_TXN, MemoryContextSwitchTo(), ReorderBufferTXN::ninvalidations_distributed, pfree(), RBTXN_DISTR_INVAL_OVERFLOWED, rbtxn_distr_inval_overflowed, rbtxn_get_toptxn, ReorderBufferAccumulateInvalidations(), ReorderBufferQueueInvalidations(), ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by SnapBuildDistributeSnapshotAndInval().

◆ ReorderBufferAddInvalidations()

void ReorderBufferAddInvalidations ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Size  nmsgs,
SharedInvalidationMessage msgs 
)

Definition at line 3542 of file reorderbuffer.c.

3545{
3546 ReorderBufferTXN *txn;
3547 MemoryContext oldcontext;
3548
3549 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3550
3551 oldcontext = MemoryContextSwitchTo(rb->context);
3552
3553 /*
3554 * Collect all the invalidations under the top transaction, if available,
3555 * so that we can execute them all together. See comments atop this
3556 * function.
3557 */
3558 txn = rbtxn_get_toptxn(txn);
3559
3560 Assert(nmsgs > 0);
3561
3563 &txn->ninvalidations,
3564 msgs, nmsgs);
3565
3566 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3567
3568 MemoryContextSwitchTo(oldcontext);
3569}

References Assert, fb(), ReorderBufferTXN::invalidations, MemoryContextSwitchTo(), ReorderBufferTXN::ninvalidations, rbtxn_get_toptxn, ReorderBufferAccumulateInvalidations(), ReorderBufferQueueInvalidations(), and ReorderBufferTXNByXid().

Referenced by xact_decode().

◆ ReorderBufferAddNewCommandId()

◆ ReorderBufferAddNewTupleCids()

◆ ReorderBufferAddSnapshot()

◆ ReorderBufferAllocate()

ReorderBuffer * ReorderBufferAllocate ( void  )

Definition at line 324 of file reorderbuffer.c.

325{
326 ReorderBuffer *buffer;
329
331
332 /* allocate memory in own context, to have better accountability */
334 "ReorderBuffer",
336
337 buffer =
339
340 memset(&hash_ctl, 0, sizeof(hash_ctl));
341
342 buffer->context = new_ctx;
343
345 "Change",
347 sizeof(ReorderBufferChange));
348
350 "TXN",
352 sizeof(ReorderBufferTXN));
353
354 /*
355 * To minimize memory fragmentation caused by long-running transactions
356 * with changes spanning multiple memory blocks, we use a single
357 * fixed-size memory block for decoded tuple storage. The performance
358 * testing showed that the default memory block size maintains logical
359 * decoding performance without causing fragmentation due to concurrent
360 * transactions. One might think that we can use the max size as
361 * SLAB_LARGE_BLOCK_SIZE but the test also showed it doesn't help resolve
362 * the memory fragmentation.
363 */
365 "Tuples",
369
370 hash_ctl.keysize = sizeof(TransactionId);
371 hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
372 hash_ctl.hcxt = buffer->context;
373
374 buffer->by_txn = hash_create("ReorderBufferByXid", 1000, &hash_ctl,
376
378 buffer->by_txn_last_txn = NULL;
379
380 buffer->outbuf = NULL;
381 buffer->outbufsize = 0;
382 buffer->size = 0;
383
384 /* txn_heap is ordered by transaction size */
386
387 buffer->spillTxns = 0;
388 buffer->spillCount = 0;
389 buffer->spillBytes = 0;
390 buffer->streamTxns = 0;
391 buffer->streamCount = 0;
392 buffer->streamBytes = 0;
393 buffer->memExceededCount = 0;
394 buffer->totalTxns = 0;
395 buffer->totalBytes = 0;
396
398
399 dlist_init(&buffer->toplevel_by_lsn);
401 dclist_init(&buffer->catchange_txns);
402
403 /*
404 * Ensure there's no stale data from prior uses of this slot, in case some
405 * prior exit avoided calling ReorderBufferFree. Failure to do this can
406 * produce duplicated txns, and it's very cheap if there's nothing there.
407 */
409
410 return buffer;
411}

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ReorderBuffer::by_txn, ReorderBuffer::by_txn_last_txn, ReorderBuffer::by_txn_last_xid, ReorderBuffer::catchange_txns, ReorderBuffer::change_context, ReorderBuffer::context, ReorderBuffer::current_restart_decoding_lsn, CurrentMemoryContext, ReplicationSlot::data, dclist_init(), dlist_init(), fb(), GenerationContextCreate(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, InvalidTransactionId, InvalidXLogRecPtr, ReorderBuffer::memExceededCount, MemoryContextAlloc(), MyReplicationSlot, ReplicationSlotPersistentData::name, NameStr, ReorderBuffer::outbuf, ReorderBuffer::outbufsize, pairingheap_allocate(), ReorderBufferCleanupSerializedTXNs(), ReorderBufferTXNSizeCompare(), ReorderBuffer::size, SLAB_DEFAULT_BLOCK_SIZE, SlabContextCreate(), ReorderBuffer::spillBytes, ReorderBuffer::spillCount, ReorderBuffer::spillTxns, ReorderBuffer::streamBytes, ReorderBuffer::streamCount, ReorderBuffer::streamTxns, ReorderBuffer::toplevel_by_lsn, ReorderBuffer::totalBytes, ReorderBuffer::totalTxns, ReorderBuffer::tup_context, ReorderBuffer::txn_context, ReorderBuffer::txn_heap, and ReorderBuffer::txns_by_base_snapshot_lsn.

Referenced by StartupDecodingContext().

◆ ReorderBufferAllocChange()

◆ ReorderBufferAllocRelids()

Oid * ReorderBufferAllocRelids ( ReorderBuffer rb,
int  nrelids 
)

Definition at line 625 of file reorderbuffer.c.

626{
627 Oid *relids;
629
630 alloc_len = sizeof(Oid) * nrelids;
631
632 relids = (Oid *) MemoryContextAlloc(rb->context, alloc_len);
633
634 return relids;
635}

References fb(), and MemoryContextAlloc().

Referenced by DecodeTruncate(), and ReorderBufferRestoreChange().

◆ ReorderBufferAllocTupleBuf()

HeapTuple ReorderBufferAllocTupleBuf ( ReorderBuffer rb,
Size  tuple_len 
)

Definition at line 592 of file reorderbuffer.c.

593{
594 HeapTuple tuple;
596
597 alloc_len = tuple_len + SizeofHeapTupleHeader;
598
599 tuple = (HeapTuple) MemoryContextAlloc(rb->tup_context,
601 tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
602
603 return tuple;
604}

References fb(), HEAPTUPLESIZE, MemoryContextAlloc(), SizeofHeapTupleHeader, and HeapTupleData::t_data.

Referenced by DecodeDelete(), DecodeInsert(), DecodeMultiInsert(), DecodeUpdate(), and ReorderBufferRestoreChange().

◆ ReorderBufferAllocTXN()

static ReorderBufferTXN * ReorderBufferAllocTXN ( ReorderBuffer rb)
static

Definition at line 435 of file reorderbuffer.c.

436{
437 ReorderBufferTXN *txn;
438
439 txn = (ReorderBufferTXN *)
440 MemoryContextAlloc(rb->txn_context, sizeof(ReorderBufferTXN));
441
442 memset(txn, 0, sizeof(ReorderBufferTXN));
443
444 dlist_init(&txn->changes);
445 dlist_init(&txn->tuplecids);
446 dlist_init(&txn->subtxns);
447
448 /* InvalidCommandId is not zero, so set it explicitly */
451
452 return txn;
453}

References ReorderBufferTXN::changes, ReorderBufferTXN::command_id, dlist_init(), fb(), InvalidCommandId, MemoryContextAlloc(), ReorderBufferTXN::output_plugin_private, ReorderBufferTXN::subtxns, and ReorderBufferTXN::tuplecids.

Referenced by ReorderBufferTXNByXid().

◆ ReorderBufferApplyChange()

static void ReorderBufferApplyChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2072 of file reorderbuffer.c.

2075{
2076 if (streaming)
2077 rb->stream_change(rb, txn, relation, change);
2078 else
2079 rb->apply_change(rb, txn, relation, change);
2080}

References fb().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferApplyMessage()

static void ReorderBufferApplyMessage ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2100 of file reorderbuffer.c.

2102{
2103 if (streaming)
2104 rb->stream_message(rb, txn, change->lsn, true,
2105 change->data.msg.prefix,
2106 change->data.msg.message_size,
2107 change->data.msg.message);
2108 else
2109 rb->message(rb, txn, change->lsn, true,
2110 change->data.msg.prefix,
2111 change->data.msg.message_size,
2112 change->data.msg.message);
2113}

References ReorderBufferChange::data, fb(), ReorderBufferChange::lsn, ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, and ReorderBufferChange::prefix.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferApplyTruncate()

static void ReorderBufferApplyTruncate ( ReorderBuffer rb,
ReorderBufferTXN txn,
int  nrelations,
Relation relations,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2086 of file reorderbuffer.c.

2089{
2090 if (streaming)
2091 rb->stream_truncate(rb, txn, nrelations, relations, change);
2092 else
2093 rb->apply_truncate(rb, txn, nrelations, relations, change);
2094}

References fb().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferAssignChild()

void ReorderBufferAssignChild ( ReorderBuffer rb,
TransactionId  xid,
TransactionId  subxid,
XLogRecPtr  lsn 
)

Definition at line 1099 of file reorderbuffer.c.

1101{
1102 ReorderBufferTXN *txn;
1104 bool new_top;
1105 bool new_sub;
1106
1107 txn = ReorderBufferTXNByXid(rb, xid, true, &new_top, lsn, true);
1108 subtxn = ReorderBufferTXNByXid(rb, subxid, true, &new_sub, lsn, false);
1109
1110 if (!new_sub)
1111 {
1113 {
1114 /* already associated, nothing to do */
1115 return;
1116 }
1117 else
1118 {
1119 /*
1120 * We already saw this transaction, but initially added it to the
1121 * list of top-level txns. Now that we know it's not top-level,
1122 * remove it from there.
1123 */
1124 dlist_delete(&subtxn->node);
1125 }
1126 }
1127
1128 subtxn->txn_flags |= RBTXN_IS_SUBXACT;
1129 subtxn->toplevel_xid = xid;
1130 Assert(subtxn->nsubtxns == 0);
1131
1132 /* set the reference to top-level transaction */
1133 subtxn->toptxn = txn;
1134
1135 /* add to subtransaction list */
1136 dlist_push_tail(&txn->subtxns, &subtxn->node);
1137 txn->nsubtxns++;
1138
1139 /* Possibly transfer the subtxn's snapshot to its top-level txn. */
1141
1142 /* Verify LSN-ordering invariant */
1144}

References Assert, AssertTXNLsnOrder(), dlist_delete(), dlist_push_tail(), fb(), ReorderBufferTXN::nsubtxns, rbtxn_is_known_subxact, RBTXN_IS_SUBXACT, ReorderBufferTransferSnapToParent(), ReorderBufferTXNByXid(), and ReorderBufferTXN::subtxns.

Referenced by LogicalDecodingProcessRecord(), and ReorderBufferCommitChild().

◆ ReorderBufferBuildTupleCidHash()

static void ReorderBufferBuildTupleCidHash ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1836 of file reorderbuffer.c.

1837{
1838 dlist_iter iter;
1840
1842 return;
1843
1845 hash_ctl.entrysize = sizeof(ReorderBufferTupleCidEnt);
1846 hash_ctl.hcxt = rb->context;
1847
1848 /*
1849 * create the hash with the exact number of to-be-stored tuplecids from
1850 * the start
1851 */
1852 txn->tuplecid_hash =
1853 hash_create("ReorderBufferTupleCid", txn->ntuplecids, &hash_ctl,
1855
1856 dlist_foreach(iter, &txn->tuplecids)
1857 {
1860 bool found;
1861 ReorderBufferChange *change;
1862
1863 change = dlist_container(ReorderBufferChange, node, iter.cur);
1864
1866
1867 /* be careful about padding */
1868 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
1869
1870 key.rlocator = change->data.tuplecid.locator;
1871
1873 &key.tid);
1874
1876 hash_search(txn->tuplecid_hash, &key, HASH_ENTER, &found);
1877 if (!found)
1878 {
1879 ent->cmin = change->data.tuplecid.cmin;
1880 ent->cmax = change->data.tuplecid.cmax;
1881 ent->combocid = change->data.tuplecid.combocid;
1882 }
1883 else
1884 {
1885 /*
1886 * Maybe we already saw this tuple before in this transaction, but
1887 * if so it must have the same cmin.
1888 */
1889 Assert(ent->cmin == change->data.tuplecid.cmin);
1890
1891 /*
1892 * cmax may be initially invalid, but once set it can only grow,
1893 * and never become invalid again.
1894 */
1895 Assert((ent->cmax == InvalidCommandId) ||
1896 ((change->data.tuplecid.cmax != InvalidCommandId) &&
1897 (change->data.tuplecid.cmax > ent->cmax)));
1898 ent->cmax = change->data.tuplecid.cmax;
1899 }
1900 }
1901}

References ReorderBufferChange::action, Assert, ReorderBufferChange::cmax, ReorderBufferChange::cmin, ReorderBufferChange::combocid, dlist_iter::cur, ReorderBufferChange::data, dlist_container, dlist_foreach, dlist_is_empty(), fb(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_ENTER, hash_search(), InvalidCommandId, ItemPointerCopy(), HASHCTL::keysize, ReorderBufferChange::locator, ReorderBufferTXN::ntuplecids, rbtxn_has_catalog_changes, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChange::tid, ReorderBufferChange::tuplecid, ReorderBufferTXN::tuplecid_hash, and ReorderBufferTXN::tuplecids.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferCanStartStreaming()

static bool ReorderBufferCanStartStreaming ( ReorderBuffer rb)
inlinestatic

Definition at line 4316 of file reorderbuffer.c.

4317{
4318 LogicalDecodingContext *ctx = rb->private_data;
4319 SnapBuild *builder = ctx->snapshot_builder;
4320
4321 /* We can't start streaming unless a consistent state is reached. */
4323 return false;
4324
4325 /*
4326 * We can't start streaming immediately even if the streaming is enabled
4327 * because we previously decoded this transaction and now just are
4328 * restarting.
4329 */
4331 !SnapBuildXactNeedsSkip(builder, ctx->reader->ReadRecPtr))
4332 return true;
4333
4334 return false;
4335}

References fb(), LogicalDecodingContext::reader, XLogReaderState::ReadRecPtr, ReorderBufferCanStream(), SNAPBUILD_CONSISTENT, SnapBuildCurrentState(), SnapBuildXactNeedsSkip(), and LogicalDecodingContext::snapshot_builder.

Referenced by ReorderBufferCheckMemoryLimit(), and ReorderBufferProcessPartialChange().

◆ ReorderBufferCanStream()

static bool ReorderBufferCanStream ( ReorderBuffer rb)
inlinestatic

Definition at line 4307 of file reorderbuffer.c.

4308{
4309 LogicalDecodingContext *ctx = rb->private_data;
4310
4311 return ctx->streaming;
4312}

References fb(), and LogicalDecodingContext::streaming.

Referenced by ReorderBufferCanStartStreaming(), and ReorderBufferProcessPartialChange().

◆ ReorderBufferChangeMemoryUpdate()

static void ReorderBufferChangeMemoryUpdate ( ReorderBuffer rb,
ReorderBufferChange change,
ReorderBufferTXN txn,
bool  addition,
Size  sz 
)
static

Definition at line 3385 of file reorderbuffer.c.

3389{
3390 ReorderBufferTXN *toptxn;
3391
3392 Assert(txn || change);
3393
3394 /*
3395 * Ignore tuple CID changes, because those are not evicted when reaching
3396 * memory limit. So we just don't count them, because it might easily
3397 * trigger a pointless attempt to spill.
3398 */
3399 if (change && change->action == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID)
3400 return;
3401
3402 if (sz == 0)
3403 return;
3404
3405 if (txn == NULL)
3406 txn = change->txn;
3407 Assert(txn != NULL);
3408
3409 /*
3410 * Update the total size in top level as well. This is later used to
3411 * compute the decoding stats.
3412 */
3413 toptxn = rbtxn_get_toptxn(txn);
3414
3415 if (addition)
3416 {
3417 Size oldsize = txn->size;
3418
3419 txn->size += sz;
3420 rb->size += sz;
3421
3422 /* Update the total size in the top transaction. */
3423 toptxn->total_size += sz;
3424
3425 /* Update the max-heap */
3426 if (oldsize != 0)
3427 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3428 pairingheap_add(rb->txn_heap, &txn->txn_node);
3429 }
3430 else
3431 {
3432 Assert((rb->size >= sz) && (txn->size >= sz));
3433 txn->size -= sz;
3434 rb->size -= sz;
3435
3436 /* Update the total size in the top transaction. */
3437 toptxn->total_size -= sz;
3438
3439 /* Update the max-heap */
3440 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3441 if (txn->size != 0)
3442 pairingheap_add(rb->txn_heap, &txn->txn_node);
3443 }
3444
3445 Assert(txn->size <= rb->size);
3446}

References ReorderBufferChange::action, Assert, fb(), pairingheap_add(), pairingheap_remove(), rbtxn_get_toptxn, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferTXN::size, ReorderBufferTXN::total_size, ReorderBufferChange::txn, and ReorderBufferTXN::txn_node.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferQueueChange(), ReorderBufferRestoreChange(), ReorderBufferSerializeTXN(), ReorderBufferToastReplace(), and ReorderBufferTruncateTXN().

◆ ReorderBufferChangeSize()

static Size ReorderBufferChangeSize ( ReorderBufferChange change)
static

Definition at line 4459 of file reorderbuffer.c.

4460{
4461 Size sz = sizeof(ReorderBufferChange);
4462
4463 switch (change->action)
4464 {
4465 /* fall through these, they're all similar enough */
4470 {
4472 newtup;
4473 Size oldlen = 0;
4474 Size newlen = 0;
4475
4476 oldtup = change->data.tp.oldtuple;
4477 newtup = change->data.tp.newtuple;
4478
4479 if (oldtup)
4480 {
4481 sz += sizeof(HeapTupleData);
4482 oldlen = oldtup->t_len;
4483 sz += oldlen;
4484 }
4485
4486 if (newtup)
4487 {
4488 sz += sizeof(HeapTupleData);
4489 newlen = newtup->t_len;
4490 sz += newlen;
4491 }
4492
4493 break;
4494 }
4496 {
4497 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4498
4499 sz += prefix_size + change->data.msg.message_size +
4500 sizeof(Size) + sizeof(Size);
4501
4502 break;
4503 }
4505 {
4506 sz += sizeof(SharedInvalidationMessage) *
4507 change->data.inval.ninvalidations;
4508 break;
4509 }
4511 {
4512 Snapshot snap;
4513
4514 snap = change->data.snapshot;
4515
4516 sz += sizeof(SnapshotData) +
4517 sizeof(TransactionId) * snap->xcnt +
4518 sizeof(TransactionId) * snap->subxcnt;
4519
4520 break;
4521 }
4523 {
4524 sz += sizeof(Oid) * change->data.truncate.nrelids;
4525
4526 break;
4527 }
4532 /* ReorderBufferChange contains everything important */
4533 break;
4534 }
4535
4536 return sz;
4537}

References ReorderBufferChange::action, ReorderBufferChange::data, fb(), ReorderBufferChange::inval, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::prefix, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChange::snapshot, HeapTupleData::t_len, ReorderBufferChange::tp, ReorderBufferChange::truncate, and SnapshotData::xcnt.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferQueueChange(), ReorderBufferRestoreChange(), ReorderBufferToastReplace(), and ReorderBufferTruncateTXN().

◆ ReorderBufferCheckAndTruncateAbortedTXN()

static bool ReorderBufferCheckAndTruncateAbortedTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1774 of file reorderbuffer.c.

1775{
1776 /* Quick return for regression tests */
1778 return false;
1779
1780 /*
1781 * Quick return if the transaction status is already known.
1782 */
1783
1784 if (rbtxn_is_committed(txn))
1785 return false;
1786 if (rbtxn_is_aborted(txn))
1787 {
1788 /* Already-aborted transactions should not have any changes */
1789 Assert(txn->size == 0);
1790
1791 return true;
1792 }
1793
1794 /* Otherwise, check the transaction status using CLOG lookup */
1795
1797 return false;
1798
1799 if (TransactionIdDidCommit(txn->xid))
1800 {
1801 /*
1802 * Remember the transaction is committed so that we can skip CLOG
1803 * check next time, avoiding the pressure on CLOG lookup.
1804 */
1805 Assert(!rbtxn_is_aborted(txn));
1807 return false;
1808 }
1809
1810 /*
1811 * The transaction aborted. We discard both the changes collected so far
1812 * and the toast reconstruction data. The full cleanup will happen as part
1813 * of decoding ABORT record of this transaction.
1814 */
1817
1818 /* All changes should be discarded */
1819 Assert(txn->size == 0);
1820
1821 /*
1822 * Mark the transaction as aborted so we can ignore future changes of this
1823 * transaction.
1824 */
1827
1828 return true;
1829}

References Assert, DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE, debug_logical_replication_streaming, fb(), RBTXN_IS_ABORTED, rbtxn_is_aborted, RBTXN_IS_COMMITTED, rbtxn_is_committed, rbtxn_is_prepared, ReorderBufferToastReset(), ReorderBufferTruncateTXN(), ReorderBufferTXN::size, TransactionIdDidCommit(), TransactionIdIsInProgress(), ReorderBufferTXN::txn_flags, unlikely, and ReorderBufferTXN::xid.

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferCheckMemoryLimit()

static void ReorderBufferCheckMemoryLimit ( ReorderBuffer rb)
static

Definition at line 3896 of file reorderbuffer.c.

3897{
3898 ReorderBufferTXN *txn;
3899 bool update_stats = true;
3900
3901 if (rb->size >= logical_decoding_work_mem * (Size) 1024)
3902 {
3903 /*
3904 * Update the statistics as the memory usage has reached the limit. We
3905 * report the statistics update later in this function since we can
3906 * update the slot statistics altogether while streaming or
3907 * serializing transactions in most cases.
3908 */
3909 rb->memExceededCount += 1;
3910 }
3912 {
3913 /*
3914 * Bail out if debug_logical_replication_streaming is buffered and we
3915 * haven't exceeded the memory limit.
3916 */
3917 return;
3918 }
3919
3920 /*
3921 * If debug_logical_replication_streaming is immediate, loop until there's
3922 * no change. Otherwise, loop until we reach under the memory limit. One
3923 * might think that just by evicting the largest (sub)transaction we will
3924 * come under the memory limit based on assumption that the selected
3925 * transaction is at least as large as the most recent change (which
3926 * caused us to go over the memory limit). However, that is not true
3927 * because a user can reduce the logical_decoding_work_mem to a smaller
3928 * value before the most recent change.
3929 */
3930 while (rb->size >= logical_decoding_work_mem * (Size) 1024 ||
3932 rb->size > 0))
3933 {
3934 /*
3935 * Pick the largest non-aborted transaction and evict it from memory
3936 * by streaming, if possible. Otherwise, spill to disk.
3937 */
3940 {
3941 /* we know there has to be one, because the size is not zero */
3942 Assert(txn && rbtxn_is_toptxn(txn));
3943 Assert(txn->total_size > 0);
3944 Assert(rb->size >= txn->total_size);
3945
3946 /* skip the transaction if aborted */
3948 continue;
3949
3951 }
3952 else
3953 {
3954 /*
3955 * Pick the largest transaction (or subtransaction) and evict it
3956 * from memory by serializing it to disk.
3957 */
3959
3960 /* we know there has to be one, because the size is not zero */
3961 Assert(txn);
3962 Assert(txn->size > 0);
3963 Assert(rb->size >= txn->size);
3964
3965 /* skip the transaction if aborted */
3967 continue;
3968
3970 }
3971
3972 /*
3973 * After eviction, the transaction should have no entries in memory,
3974 * and should use 0 bytes for changes.
3975 */
3976 Assert(txn->size == 0);
3977 Assert(txn->nentries_mem == 0);
3978
3979 /*
3980 * We've reported the memExceededCount update while streaming or
3981 * serializing the transaction.
3982 */
3983 update_stats = false;
3984 }
3985
3986 if (update_stats)
3988
3989 /* We must be under the memory limit now. */
3990 Assert(rb->size < logical_decoding_work_mem * (Size) 1024);
3991}

References Assert, DEBUG_LOGICAL_REP_STREAMING_BUFFERED, DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE, debug_logical_replication_streaming, fb(), logical_decoding_work_mem, ReorderBufferTXN::nentries_mem, rbtxn_is_toptxn, ReorderBufferCanStartStreaming(), ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferLargestStreamableTopTXN(), ReorderBufferLargestTXN(), ReorderBufferSerializeTXN(), ReorderBufferStreamTXN(), ReorderBufferTXN::size, ReorderBufferTXN::total_size, and UpdateDecodingStats().

Referenced by ReorderBufferQueueChange().

◆ ReorderBufferCleanupSerializedTXNs()

static void ReorderBufferCleanupSerializedTXNs ( const char slotname)
static

Definition at line 4884 of file reorderbuffer.c.

4885{
4886 DIR *spill_dir;
4887 struct dirent *spill_de;
4888 struct stat statbuf;
4889 char path[MAXPGPATH * 2 + sizeof(PG_REPLSLOT_DIR)];
4890
4891 sprintf(path, "%s/%s", PG_REPLSLOT_DIR, slotname);
4892
4893 /* we're only handling directories here, skip if it's not ours */
4894 if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
4895 return;
4896
4897 spill_dir = AllocateDir(path);
4898 while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL)
4899 {
4900 /* only look at names that can be ours */
4901 if (strncmp(spill_de->d_name, "xid", 3) == 0)
4902 {
4903 snprintf(path, sizeof(path),
4904 "%s/%s/%s", PG_REPLSLOT_DIR, slotname,
4905 spill_de->d_name);
4906
4907 if (unlink(path) != 0)
4908 ereport(ERROR,
4910 errmsg("could not remove file \"%s\" during removal of %s/%s/xid*: %m",
4911 path, PG_REPLSLOT_DIR, slotname)));
4912 }
4913 }
4915}

References AllocateDir(), ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), FreeDir(), INFO, lstat, MAXPGPATH, PG_REPLSLOT_DIR, ReadDirExtended(), S_ISDIR, snprintf, and sprintf.

Referenced by ReorderBufferAllocate(), ReorderBufferFree(), and StartupReorderBuffer().

◆ ReorderBufferCleanupTXN()

static void ReorderBufferCleanupTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1535 of file reorderbuffer.c.

1536{
1537 bool found;
1538 dlist_mutable_iter iter;
1539 Size mem_freed = 0;
1540
1541 /* cleanup subtransactions & their changes */
1542 dlist_foreach_modify(iter, &txn->subtxns)
1543 {
1545
1547
1548 /*
1549 * Subtransactions are always associated to the toplevel TXN, even if
1550 * they originally were happening inside another subtxn, so we won't
1551 * ever recurse more than one level deep here.
1552 */
1554 Assert(subtxn->nsubtxns == 0);
1555
1557 }
1558
1559 /* cleanup changes in the txn */
1560 dlist_foreach_modify(iter, &txn->changes)
1561 {
1562 ReorderBufferChange *change;
1563
1564 change = dlist_container(ReorderBufferChange, node, iter.cur);
1565
1566 /* Check we're not mixing changes from different transactions. */
1567 Assert(change->txn == txn);
1568
1569 /*
1570 * Instead of updating the memory counter for individual changes, we
1571 * sum up the size of memory to free so we can update the memory
1572 * counter all together below. This saves costs of maintaining the
1573 * max-heap.
1574 */
1576
1577 ReorderBufferFreeChange(rb, change, false);
1578 }
1579
1580 /* Update the memory counter */
1582
1583 /*
1584 * Cleanup the tuplecids we stored for decoding catalog snapshot access.
1585 * They are always stored in the toplevel transaction.
1586 */
1587 dlist_foreach_modify(iter, &txn->tuplecids)
1588 {
1589 ReorderBufferChange *change;
1590
1591 change = dlist_container(ReorderBufferChange, node, iter.cur);
1592
1593 /* Check we're not mixing changes from different transactions. */
1594 Assert(change->txn == txn);
1596
1597 ReorderBufferFreeChange(rb, change, true);
1598 }
1599
1600 /*
1601 * Cleanup the base snapshot, if set.
1602 */
1603 if (txn->base_snapshot != NULL)
1604 {
1607 }
1608
1609 /*
1610 * Cleanup the snapshot for the last streamed run.
1611 */
1612 if (txn->snapshot_now != NULL)
1613 {
1616 }
1617
1618 /*
1619 * Remove TXN from its containing lists.
1620 *
1621 * Note: if txn is known as subxact, we are deleting the TXN from its
1622 * parent's list of known subxacts; this leaves the parent's nsubxacts
1623 * count too high, but we don't care. Otherwise, we are deleting the TXN
1624 * from the LSN-ordered list of toplevel TXNs. We remove the TXN from the
1625 * list of catalog modifying transactions as well.
1626 */
1627 dlist_delete(&txn->node);
1629 dclist_delete_from(&rb->catchange_txns, &txn->catchange_node);
1630
1631 /* now remove reference from buffer */
1632 hash_search(rb->by_txn, &txn->xid, HASH_REMOVE, &found);
1633 Assert(found);
1634
1635 /* remove entries spilled to disk */
1636 if (rbtxn_is_serialized(txn))
1638
1639 /* deallocate */
1641}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_node, ReorderBufferTXN::catchange_node, ReorderBufferTXN::changes, dlist_mutable_iter::cur, dclist_delete_from(), dlist_container, dlist_delete(), dlist_foreach_modify, fb(), HASH_REMOVE, hash_search(), ReorderBufferTXN::node, rbtxn_has_catalog_changes, rbtxn_is_known_subxact, rbtxn_is_serialized, rbtxn_is_streamed, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferFreeSnap(), ReorderBufferFreeTXN(), ReorderBufferRestoreCleanup(), SnapBuildSnapDecRefcount(), ReorderBufferTXN::snapshot_now, ReorderBufferTXN::subtxns, ReorderBufferTXN::tuplecids, ReorderBufferChange::txn, and ReorderBufferTXN::xid.

Referenced by ReorderBufferAbort(), ReorderBufferAbortOld(), ReorderBufferCleanupTXN(), ReorderBufferFinishPrepared(), ReorderBufferForget(), ReorderBufferProcessTXN(), ReorderBufferReplay(), and ReorderBufferStreamCommit().

◆ ReorderBufferCommit()

void ReorderBufferCommit ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
TimestampTz  commit_time,
RepOriginId  origin_id,
XLogRecPtr  origin_lsn 
)

Definition at line 2884 of file reorderbuffer.c.

2888{
2889 ReorderBufferTXN *txn;
2890
2891 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2892 false);
2893
2894 /* unknown transaction, nothing to replay */
2895 if (txn == NULL)
2896 return;
2897
2898 ReorderBufferReplay(txn, rb, xid, commit_lsn, end_lsn, commit_time,
2899 origin_id, origin_lsn);
2900}

References fb(), InvalidXLogRecPtr, ReorderBufferReplay(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit().

◆ ReorderBufferCommitChild()

void ReorderBufferCommitChild ( ReorderBuffer rb,
TransactionId  xid,
TransactionId  subxid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn 
)

Definition at line 1219 of file reorderbuffer.c.

1222{
1224
1225 subtxn = ReorderBufferTXNByXid(rb, subxid, false, NULL,
1226 InvalidXLogRecPtr, false);
1227
1228 /*
1229 * No need to do anything if that subtxn didn't contain any changes
1230 */
1231 if (!subtxn)
1232 return;
1233
1234 subtxn->final_lsn = commit_lsn;
1235 subtxn->end_lsn = end_lsn;
1236
1237 /*
1238 * Assign this subxact as a child of the toplevel xact (no-op if already
1239 * done.)
1240 */
1242}

References fb(), InvalidXLogRecPtr, ReorderBufferAssignChild(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit(), and DecodePrepare().

◆ ReorderBufferCopySnap()

static Snapshot ReorderBufferCopySnap ( ReorderBuffer rb,
Snapshot  orig_snap,
ReorderBufferTXN txn,
CommandId  cid 
)
static

Definition at line 1909 of file reorderbuffer.c.

1911{
1912 Snapshot snap;
1913 dlist_iter iter;
1914 int i = 0;
1915 Size size;
1916
1917 size = sizeof(SnapshotData) +
1918 sizeof(TransactionId) * orig_snap->xcnt +
1919 sizeof(TransactionId) * (txn->nsubtxns + 1);
1920
1921 snap = MemoryContextAllocZero(rb->context, size);
1922 memcpy(snap, orig_snap, sizeof(SnapshotData));
1923
1924 snap->copied = true;
1925 snap->active_count = 1; /* mark as active so nobody frees it */
1926 snap->regd_count = 0;
1927 snap->xip = (TransactionId *) (snap + 1);
1928
1929 memcpy(snap->xip, orig_snap->xip, sizeof(TransactionId) * snap->xcnt);
1930
1931 /*
1932 * snap->subxip contains all txids that belong to our transaction which we
1933 * need to check via cmin/cmax. That's why we store the toplevel
1934 * transaction in there as well.
1935 */
1936 snap->subxip = snap->xip + snap->xcnt;
1937 snap->subxip[i++] = txn->xid;
1938
1939 /*
1940 * txn->nsubtxns isn't decreased when subtransactions abort, so count
1941 * manually. Since it's an upper boundary it is safe to use it for the
1942 * allocation above.
1943 */
1944 snap->subxcnt = 1;
1945
1946 dlist_foreach(iter, &txn->subtxns)
1947 {
1949
1951 snap->subxip[i++] = sub_txn->xid;
1952 snap->subxcnt++;
1953 }
1954
1955 /* sort so we can bsearch() later */
1956 qsort(snap->subxip, snap->subxcnt, sizeof(TransactionId), xidComparator);
1957
1958 /* store the specified current CommandId */
1959 snap->curcid = cid;
1960
1961 return snap;
1962}

References dlist_iter::cur, dlist_container, dlist_foreach, fb(), i, MemoryContextAllocZero(), ReorderBufferTXN::nsubtxns, qsort, ReorderBufferTXN::subtxns, ReorderBufferTXN::xid, and xidComparator().

Referenced by ReorderBufferProcessTXN(), ReorderBufferSaveTXNSnapshot(), and ReorderBufferStreamTXN().

◆ ReorderBufferExecuteInvalidations()

static void ReorderBufferExecuteInvalidations ( uint32  nmsgs,
SharedInvalidationMessage msgs 
)
static

Definition at line 3641 of file reorderbuffer.c.

3642{
3643 int i;
3644
3645 for (i = 0; i < nmsgs; i++)
3647}

References i, and LocalExecuteInvalidationMessage().

Referenced by ReorderBufferFinishPrepared(), and ReorderBufferProcessTXN().

◆ ReorderBufferFinishPrepared()

void ReorderBufferFinishPrepared ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
XLogRecPtr  two_phase_at,
TimestampTz  commit_time,
RepOriginId  origin_id,
XLogRecPtr  origin_lsn,
char gid,
bool  is_commit 
)

Definition at line 3001 of file reorderbuffer.c.

3006{
3007 ReorderBufferTXN *txn;
3008 XLogRecPtr prepare_end_lsn;
3009 TimestampTz prepare_time;
3010
3011 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, commit_lsn, false);
3012
3013 /* unknown transaction, nothing to do */
3014 if (txn == NULL)
3015 return;
3016
3017 /*
3018 * By this time the txn has the prepare record information, remember it to
3019 * be later used for rollback.
3020 */
3021 prepare_end_lsn = txn->end_lsn;
3022 prepare_time = txn->prepare_time;
3023
3024 /* add the gid in the txn */
3025 txn->gid = pstrdup(gid);
3026
3027 /*
3028 * It is possible that this transaction is not decoded at prepare time
3029 * either because by that time we didn't have a consistent snapshot, or
3030 * two_phase was not enabled, or it was decoded earlier but we have
3031 * restarted. We only need to send the prepare if it was not decoded
3032 * earlier. We don't need to decode the xact for aborts if it is not done
3033 * already.
3034 */
3035 if ((txn->final_lsn < two_phase_at) && is_commit)
3036 {
3037 /*
3038 * txn must have been marked as a prepared transaction and skipped but
3039 * not sent a prepare. Also, the prepare info must have been updated
3040 * in txn even if we skip prepare.
3041 */
3045
3046 /*
3047 * By this time the txn has the prepare record information and it is
3048 * important to use that so that downstream gets the accurate
3049 * information. If instead, we have passed commit information here
3050 * then downstream can behave as it has already replayed commit
3051 * prepared after the restart.
3052 */
3053 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
3054 txn->prepare_time, txn->origin_id, txn->origin_lsn);
3055 }
3056
3057 txn->final_lsn = commit_lsn;
3058 txn->end_lsn = end_lsn;
3059 txn->commit_time = commit_time;
3060 txn->origin_id = origin_id;
3061 txn->origin_lsn = origin_lsn;
3062
3063 if (is_commit)
3064 rb->commit_prepared(rb, txn, commit_lsn);
3065 else
3066 rb->rollback_prepared(rb, txn, prepare_end_lsn, prepare_time);
3067
3068 /* cleanup: make sure there's no cache pollution */
3070 txn->invalidations);
3072}

References Assert, ReorderBufferTXN::commit_time, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::gid, ReorderBufferTXN::invalidations, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, pstrdup(), RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SKIPPED_PREPARE, ReorderBufferCleanupTXN(), ReorderBufferExecuteInvalidations(), ReorderBufferReplay(), ReorderBufferTXNByXid(), ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodeAbort(), and DecodeCommit().

◆ ReorderBufferForget()

void ReorderBufferForget ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3180 of file reorderbuffer.c.

3181{
3182 ReorderBufferTXN *txn;
3183
3184 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3185 false);
3186
3187 /* unknown, nothing to forget */
3188 if (txn == NULL)
3189 return;
3190
3191 /* this transaction mustn't be streamed */
3193
3194 /* cosmetic... */
3195 txn->final_lsn = lsn;
3196
3197 /*
3198 * Process only cache invalidation messages in this transaction if there
3199 * are any. Even if we're not interested in the transaction's contents, it
3200 * could have manipulated the catalog and we need to update the caches
3201 * according to that.
3202 */
3203 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3205 txn->invalidations);
3206 else
3207 Assert(txn->ninvalidations == 0);
3208
3209 /* remove potential on-disk data, and deallocate */
3211}

References Assert, ReorderBufferTXN::base_snapshot, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit().

◆ ReorderBufferFree()

void ReorderBufferFree ( ReorderBuffer rb)

Definition at line 417 of file reorderbuffer.c.

418{
419 MemoryContext context = rb->context;
420
421 /*
422 * We free separately allocated data by entirely scrapping reorderbuffer's
423 * memory context.
424 */
425 MemoryContextDelete(context);
426
427 /* Free disk space used by unconsumed reorder buffers */
429}

References ReplicationSlot::data, fb(), MemoryContextDelete(), MyReplicationSlot, ReplicationSlotPersistentData::name, NameStr, and ReorderBufferCleanupSerializedTXNs().

Referenced by FreeDecodingContext().

◆ ReorderBufferFreeChange()

void ReorderBufferFreeChange ( ReorderBuffer rb,
ReorderBufferChange change,
bool  upd_mem 
)

Definition at line 522 of file reorderbuffer.c.

524{
525 /* update memory accounting info */
526 if (upd_mem)
529
530 /* free contained data */
531 switch (change->action)
532 {
537 if (change->data.tp.newtuple)
538 {
540 change->data.tp.newtuple = NULL;
541 }
542
543 if (change->data.tp.oldtuple)
544 {
546 change->data.tp.oldtuple = NULL;
547 }
548 break;
550 if (change->data.msg.prefix != NULL)
551 pfree(change->data.msg.prefix);
552 change->data.msg.prefix = NULL;
553 if (change->data.msg.message != NULL)
554 pfree(change->data.msg.message);
555 change->data.msg.message = NULL;
556 break;
558 if (change->data.inval.invalidations)
559 pfree(change->data.inval.invalidations);
560 change->data.inval.invalidations = NULL;
561 break;
563 if (change->data.snapshot)
564 {
566 change->data.snapshot = NULL;
567 }
568 break;
569 /* no data in addition to the struct itself */
571 if (change->data.truncate.relids != NULL)
572 {
574 change->data.truncate.relids = NULL;
575 }
576 break;
581 break;
582 }
583
584 pfree(change);
585}

References ReorderBufferChange::action, ReorderBufferChange::data, fb(), ReorderBufferChange::inval, ReorderBufferChange::invalidations, ReorderBufferChange::message, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::oldtuple, pfree(), ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferFreeRelids(), ReorderBufferFreeSnap(), ReorderBufferFreeTupleBuf(), ReorderBufferChange::snapshot, ReorderBufferChange::tp, and ReorderBufferChange::truncate.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferIterTXNFinish(), ReorderBufferIterTXNNext(), ReorderBufferProcessTXN(), ReorderBufferQueueChange(), ReorderBufferResetTXN(), ReorderBufferRestoreChanges(), ReorderBufferSerializeTXN(), ReorderBufferToastReset(), and ReorderBufferTruncateTXN().

◆ ReorderBufferFreeRelids()

void ReorderBufferFreeRelids ( ReorderBuffer rb,
Oid relids 
)

Definition at line 641 of file reorderbuffer.c.

642{
643 pfree(relids);
644}

References pfree().

Referenced by ReorderBufferFreeChange().

◆ ReorderBufferFreeSnap()

static void ReorderBufferFreeSnap ( ReorderBuffer rb,
Snapshot  snap 
)
static

Definition at line 1968 of file reorderbuffer.c.

1969{
1970 if (snap->copied)
1971 pfree(snap);
1972 else
1974}

References fb(), pfree(), and SnapBuildSnapDecRefcount().

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferProcessTXN(), and ReorderBufferStreamTXN().

◆ ReorderBufferFreeTupleBuf()

void ReorderBufferFreeTupleBuf ( HeapTuple  tuple)

Definition at line 610 of file reorderbuffer.c.

611{
612 pfree(tuple);
613}

References pfree().

Referenced by ReorderBufferFreeChange().

◆ ReorderBufferFreeTXN()

static void ReorderBufferFreeTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 459 of file reorderbuffer.c.

460{
461 /* clean the lookup cache if we were cached (quite likely) */
462 if (rb->by_txn_last_xid == txn->xid)
463 {
464 rb->by_txn_last_xid = InvalidTransactionId;
465 rb->by_txn_last_txn = NULL;
466 }
467
468 /* free data that's contained */
469
470 if (txn->gid != NULL)
471 {
472 pfree(txn->gid);
473 txn->gid = NULL;
474 }
475
476 if (txn->tuplecid_hash != NULL)
477 {
479 txn->tuplecid_hash = NULL;
480 }
481
482 if (txn->invalidations)
483 {
484 pfree(txn->invalidations);
485 txn->invalidations = NULL;
486 }
487
489 {
492 }
493
494 /* Reset the toast hash */
496
497 /* All changes must be deallocated */
498 Assert(txn->size == 0);
499
500 pfree(txn);
501}

References Assert, fb(), ReorderBufferTXN::gid, hash_destroy(), ReorderBufferTXN::invalidations, ReorderBufferTXN::invalidations_distributed, InvalidTransactionId, pfree(), ReorderBufferToastReset(), ReorderBufferTXN::size, ReorderBufferTXN::tuplecid_hash, and ReorderBufferTXN::xid.

Referenced by ReorderBufferCleanupTXN().

◆ ReorderBufferGetCatalogChangesXacts()

TransactionId * ReorderBufferGetCatalogChangesXacts ( ReorderBuffer rb)

Definition at line 3691 of file reorderbuffer.c.

3692{
3693 dlist_iter iter;
3694 TransactionId *xids = NULL;
3695 size_t xcnt = 0;
3696
3697 /* Quick return if the list is empty */
3698 if (dclist_count(&rb->catchange_txns) == 0)
3699 return NULL;
3700
3701 /* Initialize XID array */
3702 xids = palloc_array(TransactionId, dclist_count(&rb->catchange_txns));
3703 dclist_foreach(iter, &rb->catchange_txns)
3704 {
3706 catchange_node,
3707 iter.cur);
3708
3710
3711 xids[xcnt++] = txn->xid;
3712 }
3713
3714 qsort(xids, xcnt, sizeof(TransactionId), xidComparator);
3715
3716 Assert(xcnt == dclist_count(&rb->catchange_txns));
3717 return xids;
3718}

References Assert, dlist_iter::cur, dclist_container, dclist_count(), dclist_foreach, fb(), palloc_array, qsort, rbtxn_has_catalog_changes, ReorderBufferTXN::xid, and xidComparator().

Referenced by SnapBuildSerialize().

◆ ReorderBufferGetInvalidations()

uint32 ReorderBufferGetInvalidations ( ReorderBuffer rb,
TransactionId  xid,
SharedInvalidationMessage **  msgs 
)

Definition at line 5631 of file reorderbuffer.c.

5633{
5634 ReorderBufferTXN *txn;
5635
5636 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
5637 false);
5638
5639 if (txn == NULL)
5640 return 0;
5641
5642 *msgs = txn->invalidations;
5643
5644 return txn->ninvalidations;
5645}

References fb(), ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, and ReorderBufferTXNByXid().

Referenced by SnapBuildDistributeSnapshotAndInval().

◆ ReorderBufferGetOldestTXN()

ReorderBufferTXN * ReorderBufferGetOldestTXN ( ReorderBuffer rb)

Definition at line 1044 of file reorderbuffer.c.

1045{
1046 ReorderBufferTXN *txn;
1047
1049
1050 if (dlist_is_empty(&rb->toplevel_by_lsn))
1051 return NULL;
1052
1053 txn = dlist_head_element(ReorderBufferTXN, node, &rb->toplevel_by_lsn);
1054
1057 return txn;
1058}

References Assert, AssertTXNLsnOrder(), dlist_head_element, dlist_is_empty(), fb(), ReorderBufferTXN::first_lsn, rbtxn_is_known_subxact, and XLogRecPtrIsValid.

Referenced by SnapBuildProcessRunningXacts().

◆ ReorderBufferGetOldestXmin()

TransactionId ReorderBufferGetOldestXmin ( ReorderBuffer rb)

Definition at line 1072 of file reorderbuffer.c.

1073{
1074 ReorderBufferTXN *txn;
1075
1077
1078 if (dlist_is_empty(&rb->txns_by_base_snapshot_lsn))
1079 return InvalidTransactionId;
1080
1081 txn = dlist_head_element(ReorderBufferTXN, base_snapshot_node,
1082 &rb->txns_by_base_snapshot_lsn);
1083 return txn->base_snapshot->xmin;
1084}

References AssertTXNLsnOrder(), ReorderBufferTXN::base_snapshot, dlist_head_element, dlist_is_empty(), fb(), InvalidTransactionId, and SnapshotData::xmin.

Referenced by SnapBuildProcessRunningXacts().

◆ ReorderBufferImmediateInvalidation()

void ReorderBufferImmediateInvalidation ( ReorderBuffer rb,
uint32  ninvalidations,
SharedInvalidationMessage invalidations 
)

Definition at line 3253 of file reorderbuffer.c.

3255{
3259 int i;
3260
3261 if (use_subtxn)
3263
3264 /*
3265 * Force invalidations to happen outside of a valid transaction - that way
3266 * entries will just be marked as invalid without accessing the catalog.
3267 * That's advantageous because we don't need to setup the full state
3268 * necessary for catalog access.
3269 */
3270 if (use_subtxn)
3272
3273 for (i = 0; i < ninvalidations; i++)
3274 LocalExecuteInvalidationMessage(&invalidations[i]);
3275
3276 if (use_subtxn)
3277 {
3280 CurrentResourceOwner = cowner;
3281 }
3282}

References AbortCurrentTransaction(), BeginInternalSubTransaction(), CurrentMemoryContext, CurrentResourceOwner, fb(), i, IsTransactionOrTransactionBlock(), LocalExecuteInvalidationMessage(), MemoryContextSwitchTo(), and RollbackAndReleaseCurrentSubTransaction().

Referenced by ReorderBufferAbort(), ReorderBufferForget(), ReorderBufferInvalidate(), and xact_decode().

◆ ReorderBufferInvalidate()

void ReorderBufferInvalidate ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3222 of file reorderbuffer.c.

3223{
3224 ReorderBufferTXN *txn;
3225
3226 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3227 false);
3228
3229 /* unknown, nothing to do */
3230 if (txn == NULL)
3231 return;
3232
3233 /*
3234 * Process cache invalidation messages if there are any. Even if we're not
3235 * interested in the transaction's contents, it could have manipulated the
3236 * catalog and we need to update the caches according to that.
3237 */
3238 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3240 txn->invalidations);
3241 else
3242 Assert(txn->ninvalidations == 0);
3243}

References Assert, ReorderBufferTXN::base_snapshot, fb(), ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodePrepare().

◆ ReorderBufferIterCompare()

static int ReorderBufferIterCompare ( Datum  a,
Datum  b,
void arg 
)
static

Definition at line 1261 of file reorderbuffer.c.

1262{
1264 XLogRecPtr pos_a = state->entries[DatumGetInt32(a)].lsn;
1265 XLogRecPtr pos_b = state->entries[DatumGetInt32(b)].lsn;
1266
1267 if (pos_a < pos_b)
1268 return 1;
1269 else if (pos_a == pos_b)
1270 return 0;
1271 return -1;
1272}

References a, arg, b, DatumGetInt32(), and fb().

Referenced by ReorderBufferIterTXNInit().

◆ ReorderBufferIterTXNFinish()

static void ReorderBufferIterTXNFinish ( ReorderBuffer rb,
ReorderBufferIterTXNState state 
)
static

Definition at line 1504 of file reorderbuffer.c.

1506{
1507 int32 off;
1508
1509 for (off = 0; off < state->nr_txns; off++)
1510 {
1511 if (state->entries[off].file.vfd != -1)
1512 FileClose(state->entries[off].file.vfd);
1513 }
1514
1515 /* free memory we might have "leaked" in the last *Next call */
1516 if (!dlist_is_empty(&state->old_change))
1517 {
1518 ReorderBufferChange *change;
1519
1520 change = dlist_container(ReorderBufferChange, node,
1521 dlist_pop_head_node(&state->old_change));
1522 ReorderBufferFreeChange(rb, change, true);
1523 Assert(dlist_is_empty(&state->old_change));
1524 }
1525
1526 binaryheap_free(state->heap);
1527 pfree(state);
1528}

References Assert, binaryheap_free(), dlist_container, dlist_is_empty(), dlist_pop_head_node(), fb(), FileClose(), pfree(), and ReorderBufferFreeChange().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferIterTXNInit()

static void ReorderBufferIterTXNInit ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferIterTXNState *volatile iter_state 
)
static

Definition at line 1284 of file reorderbuffer.c.

1286{
1287 Size nr_txns = 0;
1290 int32 off;
1291
1292 *iter_state = NULL;
1293
1294 /* Check ordering of changes in the toplevel transaction. */
1296
1297 /*
1298 * Calculate the size of our heap: one element for every transaction that
1299 * contains changes. (Besides the transactions already in the reorder
1300 * buffer, we count the one we were directly passed.)
1301 */
1302 if (txn->nentries > 0)
1303 nr_txns++;
1304
1306 {
1308
1310
1311 /* Check ordering of changes in this subtransaction. */
1313
1314 if (cur_txn->nentries > 0)
1315 nr_txns++;
1316 }
1317
1318 /* allocate iteration state */
1320 MemoryContextAllocZero(rb->context,
1322 sizeof(ReorderBufferIterTXNEntry) * nr_txns);
1323
1324 state->nr_txns = nr_txns;
1325 dlist_init(&state->old_change);
1326
1327 for (off = 0; off < state->nr_txns; off++)
1328 {
1329 state->entries[off].file.vfd = -1;
1330 state->entries[off].segno = 0;
1331 }
1332
1333 /* allocate heap */
1334 state->heap = binaryheap_allocate(state->nr_txns,
1336 state);
1337
1338 /* Now that the state fields are initialized, it is safe to return it. */
1339 *iter_state = state;
1340
1341 /*
1342 * Now insert items into the binary heap, in an unordered fashion. (We
1343 * will run a heap assembly step at the end; this is more efficient.)
1344 */
1345
1346 off = 0;
1347
1348 /* add toplevel transaction if it contains changes */
1349 if (txn->nentries > 0)
1350 {
1352
1353 if (rbtxn_is_serialized(txn))
1354 {
1355 /* serialize remaining changes */
1357 ReorderBufferRestoreChanges(rb, txn, &state->entries[off].file,
1358 &state->entries[off].segno);
1359 }
1360
1362 &txn->changes);
1363
1364 state->entries[off].lsn = cur_change->lsn;
1365 state->entries[off].change = cur_change;
1366 state->entries[off].txn = txn;
1367
1369 }
1370
1371 /* add subtransactions if they contain changes */
1373 {
1375
1377
1378 if (cur_txn->nentries > 0)
1379 {
1381
1383 {
1384 /* serialize remaining changes */
1387 &state->entries[off].file,
1388 &state->entries[off].segno);
1389 }
1391 &cur_txn->changes);
1392
1393 state->entries[off].lsn = cur_change->lsn;
1394 state->entries[off].change = cur_change;
1395 state->entries[off].txn = cur_txn;
1396
1398 }
1399 }
1400
1401 /* assemble a valid binary heap */
1402 binaryheap_build(state->heap);
1403}

References AssertChangeLsnOrder(), binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), ReorderBufferTXN::changes, dlist_container, dlist_foreach, dlist_head_element, dlist_init(), fb(), Int32GetDatum(), MemoryContextAllocZero(), ReorderBufferTXN::nentries, rbtxn_is_serialized, ReorderBufferIterCompare(), ReorderBufferRestoreChanges(), ReorderBufferSerializeTXN(), and ReorderBufferTXN::subtxns.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferIterTXNNext()

static ReorderBufferChange * ReorderBufferIterTXNNext ( ReorderBuffer rb,
ReorderBufferIterTXNState state 
)
static

Definition at line 1412 of file reorderbuffer.c.

1413{
1414 ReorderBufferChange *change;
1416 int32 off;
1417
1418 /* nothing there anymore */
1419 if (binaryheap_empty(state->heap))
1420 return NULL;
1421
1422 off = DatumGetInt32(binaryheap_first(state->heap));
1423 entry = &state->entries[off];
1424
1425 /* free memory we might have "leaked" in the previous *Next call */
1426 if (!dlist_is_empty(&state->old_change))
1427 {
1428 change = dlist_container(ReorderBufferChange, node,
1429 dlist_pop_head_node(&state->old_change));
1430 ReorderBufferFreeChange(rb, change, true);
1431 Assert(dlist_is_empty(&state->old_change));
1432 }
1433
1434 change = entry->change;
1435
1436 /*
1437 * update heap with information about which transaction has the next
1438 * relevant change in LSN order
1439 */
1440
1441 /* there are in-memory changes */
1442 if (dlist_has_next(&entry->txn->changes, &entry->change->node))
1443 {
1444 dlist_node *next = dlist_next_node(&entry->txn->changes, &change->node);
1447
1448 /* txn stays the same */
1449 state->entries[off].lsn = next_change->lsn;
1450 state->entries[off].change = next_change;
1451
1453 return change;
1454 }
1455
1456 /* try to load changes from disk */
1457 if (entry->txn->nentries != entry->txn->nentries_mem)
1458 {
1459 /*
1460 * Ugly: restoring changes will reuse *Change records, thus delete the
1461 * current one from the per-tx list and only free in the next call.
1462 */
1463 dlist_delete(&change->node);
1464 dlist_push_tail(&state->old_change, &change->node);
1465
1466 /*
1467 * Update the total bytes processed by the txn for which we are
1468 * releasing the current set of changes and restoring the new set of
1469 * changes.
1470 */
1471 rb->totalBytes += entry->txn->size;
1472 if (ReorderBufferRestoreChanges(rb, entry->txn, &entry->file,
1473 &state->entries[off].segno))
1474 {
1475 /* successfully restored changes from disk */
1478 &entry->txn->changes);
1479
1480 elog(DEBUG2, "restored %u/%u changes from disk",
1481 (uint32) entry->txn->nentries_mem,
1482 (uint32) entry->txn->nentries);
1483
1484 Assert(entry->txn->nentries_mem);
1485 /* txn stays the same */
1486 state->entries[off].lsn = next_change->lsn;
1487 state->entries[off].change = next_change;
1489
1490 return change;
1491 }
1492 }
1493
1494 /* ok, no changes there anymore, remove */
1496
1497 return change;
1498}

References Assert, binaryheap_empty, binaryheap_first(), binaryheap_remove_first(), binaryheap_replace_first(), ReorderBufferIterTXNEntry::change, ReorderBufferTXN::changes, DatumGetInt32(), DEBUG2, dlist_container, dlist_delete(), dlist_has_next(), dlist_head_element, dlist_is_empty(), dlist_next_node(), dlist_pop_head_node(), dlist_push_tail(), elog, fb(), ReorderBufferIterTXNEntry::file, Int32GetDatum(), ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, next, ReorderBufferChange::node, ReorderBufferFreeChange(), ReorderBufferRestoreChanges(), ReorderBufferTXN::size, and ReorderBufferIterTXNEntry::txn.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferLargestStreamableTopTXN()

static ReorderBufferTXN * ReorderBufferLargestStreamableTopTXN ( ReorderBuffer rb)
static

Definition at line 3846 of file reorderbuffer.c.

3847{
3848 dlist_iter iter;
3849 Size largest_size = 0;
3851
3852 /* Find the largest top-level transaction having a base snapshot. */
3853 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
3854 {
3855 ReorderBufferTXN *txn;
3856
3857 txn = dlist_container(ReorderBufferTXN, base_snapshot_node, iter.cur);
3858
3859 /* must not be a subtxn */
3861 /* base_snapshot must be set */
3862 Assert(txn->base_snapshot != NULL);
3863
3864 /* Don't consider these kinds of transactions for eviction. */
3865 if (rbtxn_has_partial_change(txn) ||
3867 rbtxn_is_aborted(txn))
3868 continue;
3869
3870 /* Find the largest of the eviction candidates. */
3871 if ((largest == NULL || txn->total_size > largest_size) &&
3872 (txn->total_size > 0))
3873 {
3874 largest = txn;
3875 largest_size = txn->total_size;
3876 }
3877 }
3878
3879 return largest;
3880}

References Assert, ReorderBufferTXN::base_snapshot, dlist_iter::cur, dlist_container, dlist_foreach, fb(), rbtxn_has_partial_change, rbtxn_has_streamable_change, rbtxn_is_aborted, rbtxn_is_known_subxact, and ReorderBufferTXN::total_size.

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferLargestTXN()

static ReorderBufferTXN * ReorderBufferLargestTXN ( ReorderBuffer rb)
static

Definition at line 3805 of file reorderbuffer.c.

3806{
3808
3809 /* Get the largest transaction from the max-heap */
3811 pairingheap_first(rb->txn_heap));
3812
3813 Assert(largest);
3814 Assert(largest->size > 0);
3815 Assert(largest->size <= rb->size);
3816
3817 return largest;
3818}

References Assert, fb(), pairingheap_container, and pairingheap_first().

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferMaybeMarkTXNStreamed()

static void ReorderBufferMaybeMarkTXNStreamed ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 2138 of file reorderbuffer.c.

2139{
2140 /*
2141 * The top-level transaction, is marked as streamed always, even if it
2142 * does not contain any changes (that is, when all the changes are in
2143 * subtransactions).
2144 *
2145 * For subtransactions, we only mark them as streamed when there are
2146 * changes in them.
2147 *
2148 * We do it this way because of aborts - we don't want to send aborts for
2149 * XIDs the downstream is not aware of. And of course, it always knows
2150 * about the top-level xact (we send the XID in all messages), but we
2151 * never stream XIDs of empty subxacts.
2152 */
2153 if (rbtxn_is_toptxn(txn) || (txn->nentries_mem != 0))
2155}

References ReorderBufferTXN::nentries_mem, RBTXN_IS_STREAMED, rbtxn_is_toptxn, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferProcessTXN(), and ReorderBufferTruncateTXN().

◆ ReorderBufferPrepare()

void ReorderBufferPrepare ( ReorderBuffer rb,
TransactionId  xid,
char gid 
)

Definition at line 2960 of file reorderbuffer.c.

2962{
2963 ReorderBufferTXN *txn;
2964
2965 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2966 false);
2967
2968 /* unknown transaction, nothing to replay */
2969 if (txn == NULL)
2970 return;
2971
2972 /*
2973 * txn must have been marked as a prepared transaction and must have
2974 * neither been skipped nor sent a prepare. Also, the prepare info must
2975 * have been updated in it by now.
2976 */
2979
2980 txn->gid = pstrdup(gid);
2981
2982 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
2983 txn->prepare_time, txn->origin_id, txn->origin_lsn);
2984
2985 /*
2986 * Send a prepare if not already done so. This might occur if we have
2987 * detected a concurrent abort while replaying the non-streaming
2988 * transaction.
2989 */
2990 if (!rbtxn_sent_prepare(txn))
2991 {
2992 rb->prepare(rb, txn, txn->final_lsn);
2994 }
2995}

References Assert, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::gid, InvalidXLogRecPtr, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, pstrdup(), RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, ReorderBufferReplay(), ReorderBufferTXNByXid(), ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodePrepare().

◆ ReorderBufferProcessPartialChange()

static void ReorderBufferProcessPartialChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferChange change,
bool  toast_insert 
)
static

Definition at line 741 of file reorderbuffer.c.

744{
745 ReorderBufferTXN *toptxn;
746
747 /*
748 * The partial changes need to be processed only while streaming
749 * in-progress transactions.
750 */
752 return;
753
754 /* Get the top transaction. */
755 toptxn = rbtxn_get_toptxn(txn);
756
757 /*
758 * Indicate a partial change for toast inserts. The change will be
759 * considered as complete once we get the insert or update on the main
760 * table and we are sure that the pending toast chunks are not required
761 * anymore.
762 *
763 * If we allow streaming when there are pending toast chunks then such
764 * chunks won't be released till the insert (multi_insert) is complete and
765 * we expect the txn to have streamed all changes after streaming. This
766 * restriction is mainly to ensure the correctness of streamed
767 * transactions and it doesn't seem worth uplifting such a restriction
768 * just to allow this case because anyway we will stream the transaction
769 * once such an insert is complete.
770 */
771 if (toast_insert)
773 else if (rbtxn_has_partial_change(toptxn) &&
774 IsInsertOrUpdate(change->action) &&
777
778 /*
779 * Indicate a partial change for speculative inserts. The change will be
780 * considered as complete once we get the speculative confirm or abort
781 * token.
782 */
783 if (IsSpecInsert(change->action))
785 else if (rbtxn_has_partial_change(toptxn) &&
788
789 /*
790 * Stream the transaction if it is serialized before and the changes are
791 * now complete in the top-level transaction.
792 *
793 * The reason for doing the streaming of such a transaction as soon as we
794 * get the complete change for it is that previously it would have reached
795 * the memory threshold and wouldn't get streamed because of incomplete
796 * changes. Delaying such transactions would increase apply lag for them.
797 */
799 !(rbtxn_has_partial_change(toptxn)) &&
800 rbtxn_is_serialized(txn) &&
802 ReorderBufferStreamTXN(rb, toptxn);
803}

References ReorderBufferChange::action, ReorderBufferChange::clear_toast_afterwards, ReorderBufferChange::data, fb(), IsInsertOrUpdate, IsSpecConfirmOrAbort, IsSpecInsert, rbtxn_get_toptxn, RBTXN_HAS_PARTIAL_CHANGE, rbtxn_has_partial_change, rbtxn_has_streamable_change, rbtxn_is_serialized, ReorderBufferCanStartStreaming(), ReorderBufferCanStream(), ReorderBufferStreamTXN(), ReorderBufferChange::tp, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferQueueChange().

◆ ReorderBufferProcessTXN()

static void ReorderBufferProcessTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
XLogRecPtr  commit_lsn,
volatile Snapshot  snapshot_now,
volatile CommandId  command_id,
bool  streaming 
)
static

Definition at line 2211 of file reorderbuffer.c.

2216{
2217 bool using_subtxn;
2223 volatile bool stream_started = false;
2224 ReorderBufferTXN *volatile curtxn = NULL;
2225
2226 /* build data to be able to lookup the CommandIds of catalog tuples */
2228
2229 /* setup the initial snapshot */
2230 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2231
2232 /*
2233 * Decoding needs access to syscaches et al., which in turn use
2234 * heavyweight locks and such. Thus we need to have enough state around to
2235 * keep track of those. The easiest way is to simply use a transaction
2236 * internally. That also allows us to easily enforce that nothing writes
2237 * to the database by checking for xid assignments.
2238 *
2239 * When we're called via the SQL SRF there's already a transaction
2240 * started, so start an explicit subtransaction there.
2241 */
2243
2244 PG_TRY();
2245 {
2246 ReorderBufferChange *change;
2247 int changes_count = 0; /* used to accumulate the number of
2248 * changes */
2249
2250 if (using_subtxn)
2251 BeginInternalSubTransaction(streaming ? "stream" : "replay");
2252 else
2254
2255 /*
2256 * We only need to send begin/begin-prepare for non-streamed
2257 * transactions.
2258 */
2259 if (!streaming)
2260 {
2261 if (rbtxn_is_prepared(txn))
2262 rb->begin_prepare(rb, txn);
2263 else
2264 rb->begin(rb, txn);
2265 }
2266
2268 while ((change = ReorderBufferIterTXNNext(rb, iterstate)) != NULL)
2269 {
2270 Relation relation = NULL;
2271 Oid reloid;
2272
2274
2275 /*
2276 * We can't call start stream callback before processing first
2277 * change.
2278 */
2280 {
2281 if (streaming)
2282 {
2283 txn->origin_id = change->origin_id;
2284 rb->stream_start(rb, txn, change->lsn);
2285 stream_started = true;
2286 }
2287 }
2288
2289 /*
2290 * Enforce correct ordering of changes, merged from multiple
2291 * subtransactions. The changes may have the same LSN due to
2292 * MULTI_INSERT xlog records.
2293 */
2295
2296 prev_lsn = change->lsn;
2297
2298 /*
2299 * Set the current xid to detect concurrent aborts. This is
2300 * required for the cases when we decode the changes before the
2301 * COMMIT record is processed.
2302 */
2303 if (streaming || rbtxn_is_prepared(change->txn))
2304 {
2305 curtxn = change->txn;
2307 }
2308
2309 switch (change->action)
2310 {
2312
2313 /*
2314 * Confirmation for speculative insertion arrived. Simply
2315 * use as a normal record. It'll be cleaned up at the end
2316 * of INSERT processing.
2317 */
2318 if (specinsert == NULL)
2319 elog(ERROR, "invalid ordering of speculative insertion changes");
2320 Assert(specinsert->data.tp.oldtuple == NULL);
2321 change = specinsert;
2323
2324 /* intentionally fall through */
2328 Assert(snapshot_now);
2329
2330 reloid = RelidByRelfilenumber(change->data.tp.rlocator.spcOid,
2331 change->data.tp.rlocator.relNumber);
2332
2333 /*
2334 * Mapped catalog tuple without data, emitted while
2335 * catalog table was in the process of being rewritten. We
2336 * can fail to look up the relfilenumber, because the
2337 * relmapper has no "historic" view, in contrast to the
2338 * normal catalog during decoding. Thus repeated rewrites
2339 * can cause a lookup failure. That's OK because we do not
2340 * decode catalog changes anyway. Normally such tuples
2341 * would be skipped over below, but we can't identify
2342 * whether the table should be logically logged without
2343 * mapping the relfilenumber to the oid.
2344 */
2345 if (reloid == InvalidOid &&
2346 change->data.tp.newtuple == NULL &&
2347 change->data.tp.oldtuple == NULL)
2348 goto change_done;
2349 else if (reloid == InvalidOid)
2350 elog(ERROR, "could not map filenumber \"%s\" to relation OID",
2351 relpathperm(change->data.tp.rlocator,
2352 MAIN_FORKNUM).str);
2353
2354 relation = RelationIdGetRelation(reloid);
2355
2356 if (!RelationIsValid(relation))
2357 elog(ERROR, "could not open relation with OID %u (for filenumber \"%s\")",
2358 reloid,
2359 relpathperm(change->data.tp.rlocator,
2360 MAIN_FORKNUM).str);
2361
2362 if (!RelationIsLogicallyLogged(relation))
2363 goto change_done;
2364
2365 /*
2366 * Ignore temporary heaps created during DDL unless the
2367 * plugin has asked for them.
2368 */
2369 if (relation->rd_rel->relrewrite && !rb->output_rewrites)
2370 goto change_done;
2371
2372 /*
2373 * For now ignore sequence changes entirely. Most of the
2374 * time they don't log changes using records we
2375 * understand, so it doesn't make sense to handle the few
2376 * cases we do.
2377 */
2378 if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
2379 goto change_done;
2380
2381 /* user-triggered change */
2382 if (!IsToastRelation(relation))
2383 {
2384 ReorderBufferToastReplace(rb, txn, relation, change);
2385 ReorderBufferApplyChange(rb, txn, relation, change,
2386 streaming);
2387
2388 /*
2389 * Only clear reassembled toast chunks if we're sure
2390 * they're not required anymore. The creator of the
2391 * tuple tells us.
2392 */
2393 if (change->data.tp.clear_toast_afterwards)
2395 }
2396 /* we're not interested in toast deletions */
2397 else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
2398 {
2399 /*
2400 * Need to reassemble the full toasted Datum in
2401 * memory, to ensure the chunks don't get reused till
2402 * we're done remove it from the list of this
2403 * transaction's changes. Otherwise it will get
2404 * freed/reused while restoring spooled data from
2405 * disk.
2406 */
2407 Assert(change->data.tp.newtuple != NULL);
2408
2409 dlist_delete(&change->node);
2410 ReorderBufferToastAppendChunk(rb, txn, relation,
2411 change);
2412 }
2413
2415
2416 /*
2417 * If speculative insertion was confirmed, the record
2418 * isn't needed anymore.
2419 */
2420 if (specinsert != NULL)
2421 {
2423 specinsert = NULL;
2424 }
2425
2426 if (RelationIsValid(relation))
2427 {
2428 RelationClose(relation);
2429 relation = NULL;
2430 }
2431 break;
2432
2434
2435 /*
2436 * Speculative insertions are dealt with by delaying the
2437 * processing of the insert until the confirmation record
2438 * arrives. For that we simply unlink the record from the
2439 * chain, so it does not get freed/reused while restoring
2440 * spooled data from disk.
2441 *
2442 * This is safe in the face of concurrent catalog changes
2443 * because the relevant relation can't be changed between
2444 * speculative insertion and confirmation due to
2445 * CheckTableNotInUse() and locking.
2446 */
2447
2448 /* clear out a pending (and thus failed) speculation */
2449 if (specinsert != NULL)
2450 {
2452 specinsert = NULL;
2453 }
2454
2455 /* and memorize the pending insertion */
2456 dlist_delete(&change->node);
2457 specinsert = change;
2458 break;
2459
2461
2462 /*
2463 * Abort for speculative insertion arrived. So cleanup the
2464 * specinsert tuple and toast hash.
2465 *
2466 * Note that we get the spec abort change for each toast
2467 * entry but we need to perform the cleanup only the first
2468 * time we get it for the main table.
2469 */
2470 if (specinsert != NULL)
2471 {
2472 /*
2473 * We must clean the toast hash before processing a
2474 * completely new tuple to avoid confusion about the
2475 * previous tuple's toast chunks.
2476 */
2479
2480 /* We don't need this record anymore. */
2482 specinsert = NULL;
2483 }
2484 break;
2485
2487 {
2488 int i;
2489 int nrelids = change->data.truncate.nrelids;
2490 int nrelations = 0;
2491 Relation *relations;
2492
2493 relations = palloc0(nrelids * sizeof(Relation));
2494 for (i = 0; i < nrelids; i++)
2495 {
2496 Oid relid = change->data.truncate.relids[i];
2497 Relation rel;
2498
2499 rel = RelationIdGetRelation(relid);
2500
2501 if (!RelationIsValid(rel))
2502 elog(ERROR, "could not open relation with OID %u", relid);
2503
2504 if (!RelationIsLogicallyLogged(rel))
2505 continue;
2506
2507 relations[nrelations++] = rel;
2508 }
2509
2510 /* Apply the truncate. */
2512 relations, change,
2513 streaming);
2514
2515 for (i = 0; i < nrelations; i++)
2516 RelationClose(relations[i]);
2517
2518 break;
2519 }
2520
2522 ReorderBufferApplyMessage(rb, txn, change, streaming);
2523 break;
2524
2526 /* Execute the invalidation messages locally */
2528 change->data.inval.invalidations);
2529 break;
2530
2532 /* get rid of the old */
2534
2535 if (snapshot_now->copied)
2536 {
2537 ReorderBufferFreeSnap(rb, snapshot_now);
2538 snapshot_now =
2540 txn, command_id);
2541 }
2542
2543 /*
2544 * Restored from disk, need to be careful not to double
2545 * free. We could introduce refcounting for that, but for
2546 * now this seems infrequent enough not to care.
2547 */
2548 else if (change->data.snapshot->copied)
2549 {
2550 snapshot_now =
2552 txn, command_id);
2553 }
2554 else
2555 {
2556 snapshot_now = change->data.snapshot;
2557 }
2558
2559 /* and continue with the new one */
2560 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2561 break;
2562
2565
2566 if (command_id < change->data.command_id)
2567 {
2568 command_id = change->data.command_id;
2569
2570 if (!snapshot_now->copied)
2571 {
2572 /* we don't use the global one anymore */
2573 snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2574 txn, command_id);
2575 }
2576
2577 snapshot_now->curcid = command_id;
2578
2580 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2581 }
2582
2583 break;
2584
2586 elog(ERROR, "tuplecid value in changequeue");
2587 break;
2588 }
2589
2590 /*
2591 * It is possible that the data is not sent to downstream for a
2592 * long time either because the output plugin filtered it or there
2593 * is a DDL that generates a lot of data that is not processed by
2594 * the plugin. So, in such cases, the downstream can timeout. To
2595 * avoid that we try to send a keepalive message if required.
2596 * Trying to send a keepalive message after every change has some
2597 * overhead, but testing showed there is no noticeable overhead if
2598 * we do it after every ~100 changes.
2599 */
2600#define CHANGES_THRESHOLD 100
2601
2603 {
2604 rb->update_progress_txn(rb, txn, prev_lsn);
2605 changes_count = 0;
2606 }
2607 }
2608
2609 /* speculative insertion record must be freed by now */
2611
2612 /* clean up the iterator */
2614 iterstate = NULL;
2615
2616 /*
2617 * Update total transaction count and total bytes processed by the
2618 * transaction and its subtransactions. Ensure to not count the
2619 * streamed transaction multiple times.
2620 *
2621 * Note that the statistics computation has to be done after
2622 * ReorderBufferIterTXNFinish as it releases the serialized change
2623 * which we have already accounted in ReorderBufferIterTXNNext.
2624 */
2625 if (!rbtxn_is_streamed(txn))
2626 rb->totalTxns++;
2627
2628 rb->totalBytes += txn->total_size;
2629
2630 /*
2631 * Done with current changes, send the last message for this set of
2632 * changes depending upon streaming mode.
2633 */
2634 if (streaming)
2635 {
2636 if (stream_started)
2637 {
2638 rb->stream_stop(rb, txn, prev_lsn);
2639 stream_started = false;
2640 }
2641 }
2642 else
2643 {
2644 /*
2645 * Call either PREPARE (for two-phase transactions) or COMMIT (for
2646 * regular ones).
2647 */
2648 if (rbtxn_is_prepared(txn))
2649 {
2651 rb->prepare(rb, txn, commit_lsn);
2653 }
2654 else
2655 rb->commit(rb, txn, commit_lsn);
2656 }
2657
2658 /* this is just a sanity check against bad output plugin behaviour */
2660 elog(ERROR, "output plugin used XID %u",
2662
2663 /*
2664 * Remember the command ID and snapshot for the next set of changes in
2665 * streaming mode.
2666 */
2667 if (streaming)
2668 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2669 else if (snapshot_now->copied)
2670 ReorderBufferFreeSnap(rb, snapshot_now);
2671
2672 /* cleanup */
2674
2675 /*
2676 * Aborting the current (sub-)transaction as a whole has the right
2677 * semantics. We want all locks acquired in here to be released, not
2678 * reassigned to the parent and we do not want any database access
2679 * have persistent effects.
2680 */
2682
2683 /* make sure there's no cache pollution */
2685 {
2688 }
2689 else
2690 {
2694 }
2695
2696 if (using_subtxn)
2697 {
2700 CurrentResourceOwner = cowner;
2701 }
2702
2703 /*
2704 * We are here due to one of the four reasons: 1. Decoding an
2705 * in-progress txn. 2. Decoding a prepared txn. 3. Decoding of a
2706 * prepared txn that was (partially) streamed. 4. Decoding a committed
2707 * txn.
2708 *
2709 * For 1, we allow truncation of txn data by removing the changes
2710 * already streamed but still keeping other things like invalidations,
2711 * snapshot, and tuplecids. For 2 and 3, we indicate
2712 * ReorderBufferTruncateTXN to do more elaborate truncation of txn
2713 * data as the entire transaction has been decoded except for commit.
2714 * For 4, as the entire txn has been decoded, we can fully clean up
2715 * the TXN reorder buffer.
2716 */
2717 if (streaming || rbtxn_is_prepared(txn))
2718 {
2719 if (streaming)
2721
2723 /* Reset the CheckXidAlive */
2725 }
2726 else
2728 }
2729 PG_CATCH();
2730 {
2733
2734 /* TODO: Encapsulate cleanup from the PG_TRY and PG_CATCH blocks */
2735 if (iterstate)
2737
2739
2740 /*
2741 * Force cache invalidation to happen outside of a valid transaction
2742 * to prevent catalog access as we just caught an error.
2743 */
2745
2746 /* make sure there's no cache pollution */
2748 {
2751 }
2752 else
2753 {
2757 }
2758
2759 if (using_subtxn)
2760 {
2763 CurrentResourceOwner = cowner;
2764 }
2765
2766 /*
2767 * The error code ERRCODE_TRANSACTION_ROLLBACK indicates a concurrent
2768 * abort of the (sub)transaction we are streaming or preparing. We
2769 * need to do the cleanup and return gracefully on this error, see
2770 * SetupCheckXidLive.
2771 *
2772 * This error code can be thrown by one of the callbacks we call
2773 * during decoding so we need to ensure that we return gracefully only
2774 * when we are sending the data in streaming mode and the streaming is
2775 * not finished yet or when we are sending the data out on a PREPARE
2776 * during a two-phase commit.
2777 */
2778 if (errdata->sqlerrcode == ERRCODE_TRANSACTION_ROLLBACK &&
2780 {
2781 /* curtxn must be set for streaming or prepared transactions */
2782 Assert(curtxn);
2783
2784 /* Cleanup the temporary error state. */
2787 errdata = NULL;
2788
2789 /* Remember the transaction is aborted. */
2791 curtxn->txn_flags |= RBTXN_IS_ABORTED;
2792
2793 /* Mark the transaction is streamed if appropriate */
2794 if (stream_started)
2796
2797 /* Reset the TXN so that it is allowed to stream remaining data. */
2798 ReorderBufferResetTXN(rb, txn, snapshot_now,
2799 command_id, prev_lsn,
2800 specinsert);
2801 }
2802 else
2803 {
2806 PG_RE_THROW();
2807 }
2808 }
2809 PG_END_TRY();
2810}

References AbortCurrentTransaction(), ReorderBufferChange::action, Assert, BeginInternalSubTransaction(), CHANGES_THRESHOLD, CHECK_FOR_INTERRUPTS, CheckXidAlive, ReorderBufferChange::clear_toast_afterwards, ReorderBufferChange::command_id, SnapshotData::copied, CopyErrorData(), SnapshotData::curcid, CurrentMemoryContext, CurrentResourceOwner, ReorderBufferChange::data, data, dlist_delete(), elog, ERROR, fb(), FlushErrorState(), FreeErrorData(), GetCurrentTransactionId(), GetCurrentTransactionIdIfAny(), i, ReorderBufferChange::inval, InvalidateSystemCaches(), ReorderBufferChange::invalidations, ReorderBufferTXN::invalidations, ReorderBufferTXN::invalidations_distributed, InvalidCommandId, InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsToastRelation(), IsTransactionOrTransactionBlock(), ReorderBufferChange::lsn, MAIN_FORKNUM, MemoryContextSwitchTo(), ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::ninvalidations_distributed, ReorderBufferChange::node, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::origin_id, ReorderBufferTXN::origin_id, palloc0(), PG_CATCH, PG_END_TRY, PG_RE_THROW, PG_TRY, rbtxn_distr_inval_overflowed, RBTXN_IS_ABORTED, rbtxn_is_committed, rbtxn_is_prepared, rbtxn_is_streamed, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, RelationData::rd_rel, RelationClose(), RelationIdGetRelation(), RelationIsLogicallyLogged, RelationIsValid, RelidByRelfilenumber(), ReorderBufferChange::relids, RelFileLocator::relNumber, relpathperm, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferApplyChange(), ReorderBufferApplyMessage(), ReorderBufferApplyTruncate(), ReorderBufferBuildTupleCidHash(), ReorderBufferCleanupTXN(), ReorderBufferCopySnap(), ReorderBufferExecuteInvalidations(), ReorderBufferFreeChange(), ReorderBufferFreeSnap(), ReorderBufferIterTXNFinish(), ReorderBufferIterTXNInit(), ReorderBufferIterTXNNext(), ReorderBufferMaybeMarkTXNStreamed(), ReorderBufferResetTXN(), ReorderBufferSaveTXNSnapshot(), ReorderBufferToastAppendChunk(), ReorderBufferToastReplace(), ReorderBufferToastReset(), ReorderBufferTruncateTXN(), ReorderBufferChange::rlocator, RollbackAndReleaseCurrentSubTransaction(), SetupCheckXidLive(), SetupHistoricSnapshot(), ReorderBufferChange::snapshot, RelFileLocator::spcOid, StartTransactionCommand(), TeardownHistoricSnapshot(), ReorderBufferTXN::total_size, ReorderBufferChange::tp, ReorderBufferChange::truncate, ReorderBufferTXN::tuplecid_hash, ReorderBufferChange::txn, ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by ReorderBufferReplay(), and ReorderBufferStreamTXN().

◆ ReorderBufferProcessXid()

void ReorderBufferProcessXid ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3295 of file reorderbuffer.c.

3296{
3297 /* many records won't have an xid assigned, centralize check here */
3298 if (xid != InvalidTransactionId)
3299 ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3300}

References fb(), InvalidTransactionId, and ReorderBufferTXNByXid().

Referenced by heap2_decode(), heap_decode(), LogicalDecodingProcessRecord(), logicalmsg_decode(), standby_decode(), xact_decode(), and xlog_decode().

◆ ReorderBufferQueueChange()

void ReorderBufferQueueChange ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
ReorderBufferChange change,
bool  toast_insert 
)

Definition at line 810 of file reorderbuffer.c.

812{
813 ReorderBufferTXN *txn;
814
815 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
816
817 /*
818 * If we have detected that the transaction is aborted while streaming the
819 * previous changes or by checking its CLOG, there is no point in
820 * collecting further changes for it.
821 */
822 if (rbtxn_is_aborted(txn))
823 {
824 /*
825 * We don't need to update memory accounting for this change as we
826 * have not added it to the queue yet.
827 */
828 ReorderBufferFreeChange(rb, change, false);
829 return;
830 }
831
832 /*
833 * The changes that are sent downstream are considered streamable. We
834 * remember such transactions so that only those will later be considered
835 * for streaming.
836 */
837 if (change->action == REORDER_BUFFER_CHANGE_INSERT ||
843 {
844 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
845
847 }
848
849 change->lsn = lsn;
850 change->txn = txn;
851
853 dlist_push_tail(&txn->changes, &change->node);
854 txn->nentries++;
855 txn->nentries_mem++;
856
857 /* update memory accounting information */
860
861 /* process partial change */
863
864 /* check the memory limits and evict something if needed */
866}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::changes, dlist_push_tail(), fb(), ReorderBufferChange::lsn, ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, rbtxn_get_toptxn, RBTXN_HAS_STREAMABLE_CHANGE, rbtxn_is_aborted, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferCheckMemoryLimit(), ReorderBufferFreeChange(), ReorderBufferProcessPartialChange(), ReorderBufferTXNByXid(), ReorderBufferChange::txn, ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodeDelete(), DecodeInsert(), DecodeMultiInsert(), DecodeSpecConfirm(), DecodeTruncate(), DecodeUpdate(), ReorderBufferAddNewCommandId(), ReorderBufferAddSnapshot(), ReorderBufferQueueInvalidations(), and ReorderBufferQueueMessage().

◆ ReorderBufferQueueInvalidations()

◆ ReorderBufferQueueMessage()

void ReorderBufferQueueMessage ( ReorderBuffer rb,
TransactionId  xid,
Snapshot  snap,
XLogRecPtr  lsn,
bool  transactional,
const char prefix,
Size  message_size,
const char message 
)

Definition at line 873 of file reorderbuffer.c.

877{
878 if (transactional)
879 {
880 MemoryContext oldcontext;
881 ReorderBufferChange *change;
882
884
885 /*
886 * We don't expect snapshots for transactional changes - we'll use the
887 * snapshot derived later during apply (unless the change gets
888 * skipped).
889 */
890 Assert(!snap);
891
892 oldcontext = MemoryContextSwitchTo(rb->context);
893
896 change->data.msg.prefix = pstrdup(prefix);
897 change->data.msg.message_size = message_size;
898 change->data.msg.message = palloc(message_size);
899 memcpy(change->data.msg.message, message, message_size);
900
901 ReorderBufferQueueChange(rb, xid, lsn, change, false);
902
903 MemoryContextSwitchTo(oldcontext);
904 }
905 else
906 {
907 ReorderBufferTXN *txn = NULL;
908 volatile Snapshot snapshot_now = snap;
909
910 /* Non-transactional changes require a valid snapshot. */
911 Assert(snapshot_now);
912
913 if (xid != InvalidTransactionId)
914 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
915
916 /* setup snapshot to allow catalog access */
917 SetupHistoricSnapshot(snapshot_now, NULL);
918 PG_TRY();
919 {
920 rb->message(rb, txn, lsn, false, prefix, message_size, message);
921
923 }
924 PG_CATCH();
925 {
927 PG_RE_THROW();
928 }
929 PG_END_TRY();
930 }
931}

References ReorderBufferChange::action, Assert, ReorderBufferChange::data, fb(), InvalidTransactionId, MemoryContextSwitchTo(), ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, palloc(), PG_CATCH, PG_END_TRY, PG_RE_THROW, PG_TRY, ReorderBufferChange::prefix, pstrdup(), REORDER_BUFFER_CHANGE_MESSAGE, ReorderBufferAllocChange(), ReorderBufferQueueChange(), ReorderBufferTXNByXid(), SetupHistoricSnapshot(), and TeardownHistoricSnapshot().

Referenced by logicalmsg_decode().

◆ ReorderBufferRememberPrepareInfo()

bool ReorderBufferRememberPrepareInfo ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  prepare_lsn,
XLogRecPtr  end_lsn,
TimestampTz  prepare_time,
RepOriginId  origin_id,
XLogRecPtr  origin_lsn 
)

Definition at line 2907 of file reorderbuffer.c.

2911{
2912 ReorderBufferTXN *txn;
2913
2914 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2915
2916 /* unknown transaction, nothing to do */
2917 if (txn == NULL)
2918 return false;
2919
2920 /*
2921 * Remember the prepare information to be later used by commit prepared in
2922 * case we skip doing prepare.
2923 */
2924 txn->final_lsn = prepare_lsn;
2925 txn->end_lsn = end_lsn;
2926 txn->prepare_time = prepare_time;
2927 txn->origin_id = origin_id;
2928 txn->origin_lsn = origin_lsn;
2929
2930 /* Mark this transaction as a prepared transaction */
2933
2934 return true;
2935}

References Assert, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, InvalidXLogRecPtr, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by DecodePrepare().

◆ ReorderBufferReplay()

static void ReorderBufferReplay ( ReorderBufferTXN txn,
ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
TimestampTz  commit_time,
RepOriginId  origin_id,
XLogRecPtr  origin_lsn 
)
static

Definition at line 2823 of file reorderbuffer.c.

2828{
2829 Snapshot snapshot_now;
2830 CommandId command_id = FirstCommandId;
2831
2832 txn->final_lsn = commit_lsn;
2833 txn->end_lsn = end_lsn;
2834 txn->commit_time = commit_time;
2835 txn->origin_id = origin_id;
2836 txn->origin_lsn = origin_lsn;
2837
2838 /*
2839 * If the transaction was (partially) streamed, we need to commit it in a
2840 * 'streamed' way. That is, we first stream the remaining part of the
2841 * transaction, and then invoke stream_commit message.
2842 *
2843 * Called after everything (origin ID, LSN, ...) is stored in the
2844 * transaction to avoid passing that information directly.
2845 */
2846 if (rbtxn_is_streamed(txn))
2847 {
2849 return;
2850 }
2851
2852 /*
2853 * If this transaction has no snapshot, it didn't make any changes to the
2854 * database, so there's nothing to decode. Note that
2855 * ReorderBufferCommitChild will have transferred any snapshots from
2856 * subtransactions if there were any.
2857 */
2858 if (txn->base_snapshot == NULL)
2859 {
2860 Assert(txn->ninvalidations == 0);
2861
2862 /*
2863 * Removing this txn before a commit might result in the computation
2864 * of an incorrect restart_lsn. See SnapBuildProcessRunningXacts.
2865 */
2866 if (!rbtxn_is_prepared(txn))
2868 return;
2869 }
2870
2871 snapshot_now = txn->base_snapshot;
2872
2873 /* Process and send the changes to output plugin. */
2874 ReorderBufferProcessTXN(rb, txn, commit_lsn, snapshot_now,
2875 command_id, false);
2876}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::commit_time, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, FirstCommandId, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, rbtxn_is_prepared, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferProcessTXN(), and ReorderBufferStreamCommit().

Referenced by ReorderBufferCommit(), ReorderBufferFinishPrepared(), and ReorderBufferPrepare().

◆ ReorderBufferResetTXN()

static void ReorderBufferResetTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
Snapshot  snapshot_now,
CommandId  command_id,
XLogRecPtr  last_lsn,
ReorderBufferChange specinsert 
)
static

Definition at line 2165 of file reorderbuffer.c.

2170{
2171 /* Discard the changes that we just streamed */
2173
2174 /* Free all resources allocated for toast reconstruction */
2176
2177 /* Return the spec insert change if it is not NULL */
2178 if (specinsert != NULL)
2179 {
2181 specinsert = NULL;
2182 }
2183
2184 /*
2185 * For the streaming case, stop the stream and remember the command ID and
2186 * snapshot for the streaming run.
2187 */
2188 if (rbtxn_is_streamed(txn))
2189 {
2190 rb->stream_stop(rb, txn, last_lsn);
2191 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2192 }
2193
2194 /* All changes must be deallocated */
2195 Assert(txn->size == 0);
2196}

References Assert, fb(), rbtxn_is_prepared, rbtxn_is_streamed, ReorderBufferFreeChange(), ReorderBufferSaveTXNSnapshot(), ReorderBufferToastReset(), ReorderBufferTruncateTXN(), and ReorderBufferTXN::size.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferRestoreChange()

static void ReorderBufferRestoreChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
char data 
)
static

Definition at line 4687 of file reorderbuffer.c.

4689{
4691 ReorderBufferChange *change;
4692
4693 ondisk = (ReorderBufferDiskChange *) data;
4694
4695 change = ReorderBufferAllocChange(rb);
4696
4697 /* copy static part */
4698 memcpy(change, &ondisk->change, sizeof(ReorderBufferChange));
4699
4700 data += sizeof(ReorderBufferDiskChange);
4701
4702 /* restore individual stuff */
4703 switch (change->action)
4704 {
4705 /* fall through these, they're all similar enough */
4710 if (change->data.tp.oldtuple)
4711 {
4712 uint32 tuplelen = ((HeapTuple) data)->t_len;
4713
4714 change->data.tp.oldtuple =
4716
4717 /* restore ->tuple */
4718 memcpy(change->data.tp.oldtuple, data,
4719 sizeof(HeapTupleData));
4720 data += sizeof(HeapTupleData);
4721
4722 /* reset t_data pointer into the new tuplebuf */
4723 change->data.tp.oldtuple->t_data =
4724 (HeapTupleHeader) ((char *) change->data.tp.oldtuple + HEAPTUPLESIZE);
4725
4726 /* restore tuple data itself */
4728 data += tuplelen;
4729 }
4730
4731 if (change->data.tp.newtuple)
4732 {
4733 /* here, data might not be suitably aligned! */
4735
4737 sizeof(uint32));
4738
4739 change->data.tp.newtuple =
4741
4742 /* restore ->tuple */
4743 memcpy(change->data.tp.newtuple, data,
4744 sizeof(HeapTupleData));
4745 data += sizeof(HeapTupleData);
4746
4747 /* reset t_data pointer into the new tuplebuf */
4748 change->data.tp.newtuple->t_data =
4749 (HeapTupleHeader) ((char *) change->data.tp.newtuple + HEAPTUPLESIZE);
4750
4751 /* restore tuple data itself */
4753 data += tuplelen;
4754 }
4755
4756 break;
4758 {
4759 Size prefix_size;
4760
4761 /* read prefix */
4762 memcpy(&prefix_size, data, sizeof(Size));
4763 data += sizeof(Size);
4764 change->data.msg.prefix = MemoryContextAlloc(rb->context,
4765 prefix_size);
4766 memcpy(change->data.msg.prefix, data, prefix_size);
4767 Assert(change->data.msg.prefix[prefix_size - 1] == '\0');
4768 data += prefix_size;
4769
4770 /* read the message */
4771 memcpy(&change->data.msg.message_size, data, sizeof(Size));
4772 data += sizeof(Size);
4773 change->data.msg.message = MemoryContextAlloc(rb->context,
4774 change->data.msg.message_size);
4775 memcpy(change->data.msg.message, data,
4776 change->data.msg.message_size);
4777 data += change->data.msg.message_size;
4778
4779 break;
4780 }
4782 {
4784 change->data.inval.ninvalidations;
4785
4786 change->data.inval.invalidations =
4787 MemoryContextAlloc(rb->context, inval_size);
4788
4789 /* read the message */
4791
4792 break;
4793 }
4795 {
4798 Size size;
4799
4800 oldsnap = (Snapshot) data;
4801
4802 size = sizeof(SnapshotData) +
4803 sizeof(TransactionId) * oldsnap->xcnt +
4804 sizeof(TransactionId) * (oldsnap->subxcnt + 0);
4805
4806 change->data.snapshot = MemoryContextAllocZero(rb->context, size);
4807
4808 newsnap = change->data.snapshot;
4809
4810 memcpy(newsnap, data, size);
4811 newsnap->xip = (TransactionId *)
4812 (((char *) newsnap) + sizeof(SnapshotData));
4813 newsnap->subxip = newsnap->xip + newsnap->xcnt;
4814 newsnap->copied = true;
4815 break;
4816 }
4817 /* the base struct contains all the data, easy peasy */
4819 {
4820 Oid *relids;
4821
4822 relids = ReorderBufferAllocRelids(rb, change->data.truncate.nrelids);
4823 memcpy(relids, data, change->data.truncate.nrelids * sizeof(Oid));
4824 change->data.truncate.relids = relids;
4825
4826 break;
4827 }
4832 break;
4833 }
4834
4835 dlist_push_tail(&txn->changes, &change->node);
4836 txn->nentries_mem++;
4837
4838 /*
4839 * Update memory accounting for the restored change. We need to do this
4840 * although we don't check the memory limit when restoring the changes in
4841 * this branch (we only do that when initially queueing the changes after
4842 * decoding), because we will release the changes later, and that will
4843 * update the accounting too (subtracting the size from the counters). And
4844 * we don't want to underflow there.
4845 */
4847 ReorderBufferChangeSize(change));
4848}

References ReorderBufferChange::action, Assert, ReorderBufferDiskChange::change, ReorderBufferTXN::changes, ReorderBufferChange::data, data, dlist_push_tail(), fb(), HEAPTUPLESIZE, ReorderBufferChange::inval, ReorderBufferChange::invalidations, MemoryContextAlloc(), MemoryContextAllocZero(), ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferTXN::nentries_mem, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::node, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferAllocChange(), ReorderBufferAllocRelids(), ReorderBufferAllocTupleBuf(), ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), SizeofHeapTupleHeader, ReorderBufferChange::snapshot, HeapTupleData::t_data, ReorderBufferChange::tp, ReorderBufferChange::truncate, and SnapshotData::xcnt.

Referenced by ReorderBufferRestoreChanges().

◆ ReorderBufferRestoreChanges()

static Size ReorderBufferRestoreChanges ( ReorderBuffer rb,
ReorderBufferTXN txn,
TXNEntryFile file,
XLogSegNo segno 
)
static

Definition at line 4544 of file reorderbuffer.c.

4546{
4547 Size restored = 0;
4550 File *fd = &file->vfd;
4551
4554
4555 /* free current entries, so we have memory for more */
4557 {
4560
4561 dlist_delete(&cleanup->node);
4563 }
4564 txn->nentries_mem = 0;
4566
4568
4569 while (restored < max_changes_in_memory && *segno <= last_segno)
4570 {
4571 int readBytes;
4573
4575
4576 if (*fd == -1)
4577 {
4578 char path[MAXPGPATH];
4579
4580 /* first time in */
4581 if (*segno == 0)
4582 XLByteToSeg(txn->first_lsn, *segno, wal_segment_size);
4583
4584 Assert(*segno != 0 || dlist_is_empty(&txn->changes));
4585
4586 /*
4587 * No need to care about TLIs here, only used during a single run,
4588 * so each LSN only maps to a specific WAL record.
4589 */
4591 *segno);
4592
4594
4595 /* No harm in resetting the offset even in case of failure */
4596 file->curOffset = 0;
4597
4598 if (*fd < 0 && errno == ENOENT)
4599 {
4600 *fd = -1;
4601 (*segno)++;
4602 continue;
4603 }
4604 else if (*fd < 0)
4605 ereport(ERROR,
4607 errmsg("could not open file \"%s\": %m",
4608 path)));
4609 }
4610
4611 /*
4612 * Read the statically sized part of a change which has information
4613 * about the total size. If we couldn't read a record, we're at the
4614 * end of this file.
4615 */
4617 readBytes = FileRead(file->vfd, rb->outbuf,
4620
4621 /* eof */
4622 if (readBytes == 0)
4623 {
4624 FileClose(*fd);
4625 *fd = -1;
4626 (*segno)++;
4627 continue;
4628 }
4629 else if (readBytes < 0)
4630 ereport(ERROR,
4632 errmsg("could not read from reorderbuffer spill file: %m")));
4633 else if (readBytes != sizeof(ReorderBufferDiskChange))
4634 ereport(ERROR,
4636 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4637 readBytes,
4638 (uint32) sizeof(ReorderBufferDiskChange))));
4639
4640 file->curOffset += readBytes;
4641
4642 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4643
4645 sizeof(ReorderBufferDiskChange) + ondisk->size);
4646 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4647
4648 readBytes = FileRead(file->vfd,
4649 rb->outbuf + sizeof(ReorderBufferDiskChange),
4650 ondisk->size - sizeof(ReorderBufferDiskChange),
4651 file->curOffset,
4653
4654 if (readBytes < 0)
4655 ereport(ERROR,
4657 errmsg("could not read from reorderbuffer spill file: %m")));
4658 else if (readBytes != ondisk->size - sizeof(ReorderBufferDiskChange))
4659 ereport(ERROR,
4661 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4662 readBytes,
4663 (uint32) (ondisk->size - sizeof(ReorderBufferDiskChange)))));
4664
4665 file->curOffset += readBytes;
4666
4667 /*
4668 * ok, read a full change from disk, now restore it into proper
4669 * in-memory format
4670 */
4671 ReorderBufferRestoreChange(rb, txn, rb->outbuf);
4672 restored++;
4673 }
4674
4675 return restored;
4676}

References Assert, ReorderBufferTXN::changes, CHECK_FOR_INTERRUPTS, cleanup(), TXNEntryFile::curOffset, dlist_container, dlist_delete(), dlist_foreach_modify, dlist_is_empty(), ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), FileClose(), FileRead(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::first_lsn, max_changes_in_memory, MAXPGPATH, MyReplicationSlot, ReorderBufferTXN::nentries_mem, PathNameOpenFile(), PG_BINARY, ReorderBufferFreeChange(), ReorderBufferRestoreChange(), ReorderBufferSerializedPath(), ReorderBufferSerializeReserve(), ReorderBufferDiskChange::size, TXNEntryFile::vfd, wal_segment_size, ReorderBufferTXN::xid, XLByteToSeg, and XLogRecPtrIsValid.

Referenced by ReorderBufferIterTXNInit(), and ReorderBufferIterTXNNext().

◆ ReorderBufferRestoreCleanup()

static void ReorderBufferRestoreCleanup ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4854 of file reorderbuffer.c.

4855{
4856 XLogSegNo first;
4857 XLogSegNo cur;
4858 XLogSegNo last;
4859
4862
4865
4866 /* iterate over all possible filenames, and delete them */
4867 for (cur = first; cur <= last; cur++)
4868 {
4869 char path[MAXPGPATH];
4870
4872 if (unlink(path) != 0 && errno != ENOENT)
4873 ereport(ERROR,
4875 errmsg("could not remove file \"%s\": %m", path)));
4876 }
4877}

References Assert, cur, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::first_lsn, MAXPGPATH, MyReplicationSlot, ReorderBufferSerializedPath(), wal_segment_size, ReorderBufferTXN::xid, XLByteToSeg, and XLogRecPtrIsValid.

Referenced by ReorderBufferCleanupTXN(), and ReorderBufferTruncateTXN().

◆ ReorderBufferSaveTXNSnapshot()

static void ReorderBufferSaveTXNSnapshot ( ReorderBuffer rb,
ReorderBufferTXN txn,
Snapshot  snapshot_now,
CommandId  command_id 
)
inlinestatic

Definition at line 2120 of file reorderbuffer.c.

2122{
2123 txn->command_id = command_id;
2124
2125 /* Avoid copying if it's already copied. */
2126 if (snapshot_now->copied)
2127 txn->snapshot_now = snapshot_now;
2128 else
2129 txn->snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2130 txn, command_id);
2131}

References ReorderBufferTXN::command_id, SnapshotData::copied, fb(), ReorderBufferCopySnap(), and ReorderBufferTXN::snapshot_now.

Referenced by ReorderBufferProcessTXN(), and ReorderBufferResetTXN().

◆ ReorderBufferSerializeChange()

static void ReorderBufferSerializeChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
int  fd,
ReorderBufferChange change 
)
static

Definition at line 4092 of file reorderbuffer.c.

4094{
4097
4099
4100 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4101 memcpy(&ondisk->change, change, sizeof(ReorderBufferChange));
4102
4103 switch (change->action)
4104 {
4105 /* fall through these, they're all similar enough */
4110 {
4111 char *data;
4113 newtup;
4114 Size oldlen = 0;
4115 Size newlen = 0;
4116
4117 oldtup = change->data.tp.oldtuple;
4118 newtup = change->data.tp.newtuple;
4119
4120 if (oldtup)
4121 {
4122 sz += sizeof(HeapTupleData);
4123 oldlen = oldtup->t_len;
4124 sz += oldlen;
4125 }
4126
4127 if (newtup)
4128 {
4129 sz += sizeof(HeapTupleData);
4130 newlen = newtup->t_len;
4131 sz += newlen;
4132 }
4133
4134 /* make sure we have enough space */
4136
4137 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4138 /* might have been reallocated above */
4139 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4140
4141 if (oldlen)
4142 {
4143 memcpy(data, oldtup, sizeof(HeapTupleData));
4144 data += sizeof(HeapTupleData);
4145
4146 memcpy(data, oldtup->t_data, oldlen);
4147 data += oldlen;
4148 }
4149
4150 if (newlen)
4151 {
4152 memcpy(data, newtup, sizeof(HeapTupleData));
4153 data += sizeof(HeapTupleData);
4154
4155 memcpy(data, newtup->t_data, newlen);
4156 data += newlen;
4157 }
4158 break;
4159 }
4161 {
4162 char *data;
4163 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4164
4165 sz += prefix_size + change->data.msg.message_size +
4166 sizeof(Size) + sizeof(Size);
4168
4169 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4170
4171 /* might have been reallocated above */
4172 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4173
4174 /* write the prefix including the size */
4175 memcpy(data, &prefix_size, sizeof(Size));
4176 data += sizeof(Size);
4177 memcpy(data, change->data.msg.prefix,
4178 prefix_size);
4179 data += prefix_size;
4180
4181 /* write the message including the size */
4182 memcpy(data, &change->data.msg.message_size, sizeof(Size));
4183 data += sizeof(Size);
4184 memcpy(data, change->data.msg.message,
4185 change->data.msg.message_size);
4186 data += change->data.msg.message_size;
4187
4188 break;
4189 }
4191 {
4192 char *data;
4194 change->data.inval.ninvalidations;
4195
4196 sz += inval_size;
4197
4199 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4200
4201 /* might have been reallocated above */
4202 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4204 data += inval_size;
4205
4206 break;
4207 }
4209 {
4210 Snapshot snap;
4211 char *data;
4212
4213 snap = change->data.snapshot;
4214
4215 sz += sizeof(SnapshotData) +
4216 sizeof(TransactionId) * snap->xcnt +
4217 sizeof(TransactionId) * snap->subxcnt;
4218
4219 /* make sure we have enough space */
4221 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4222 /* might have been reallocated above */
4223 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4224
4225 memcpy(data, snap, sizeof(SnapshotData));
4226 data += sizeof(SnapshotData);
4227
4228 if (snap->xcnt)
4229 {
4230 memcpy(data, snap->xip,
4231 sizeof(TransactionId) * snap->xcnt);
4232 data += sizeof(TransactionId) * snap->xcnt;
4233 }
4234
4235 if (snap->subxcnt)
4236 {
4237 memcpy(data, snap->subxip,
4238 sizeof(TransactionId) * snap->subxcnt);
4239 data += sizeof(TransactionId) * snap->subxcnt;
4240 }
4241 break;
4242 }
4244 {
4245 Size size;
4246 char *data;
4247
4248 /* account for the OIDs of truncated relations */
4249 size = sizeof(Oid) * change->data.truncate.nrelids;
4250 sz += size;
4251
4252 /* make sure we have enough space */
4254
4255 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4256 /* might have been reallocated above */
4257 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4258
4259 memcpy(data, change->data.truncate.relids, size);
4260 data += size;
4261
4262 break;
4263 }
4268 /* ReorderBufferChange contains everything important */
4269 break;
4270 }
4271
4272 ondisk->size = sz;
4273
4274 errno = 0;
4276 if (write(fd, rb->outbuf, ondisk->size) != ondisk->size)
4277 {
4278 int save_errno = errno;
4279
4281
4282 /* if write didn't set errno, assume problem is no disk space */
4284 ereport(ERROR,
4286 errmsg("could not write to data file for XID %u: %m",
4287 txn->xid)));
4288 }
4290
4291 /*
4292 * Keep the transaction's final_lsn up to date with each change we send to
4293 * disk, so that ReorderBufferRestoreCleanup works correctly. (We used to
4294 * only do this on commit and abort records, but that doesn't work if a
4295 * system crash leaves a transaction without its abort record).
4296 *
4297 * Make sure not to move it backwards.
4298 */
4299 if (txn->final_lsn < change->lsn)
4300 txn->final_lsn = change->lsn;
4301
4302 Assert(ondisk->change.action == change->action);
4303}

References ReorderBufferChange::action, Assert, ReorderBufferDiskChange::change, CloseTransientFile(), ReorderBufferChange::data, data, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), ReorderBufferTXN::final_lsn, ReorderBufferChange::inval, ReorderBufferChange::invalidations, ReorderBufferChange::lsn, ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, pgstat_report_wait_end(), pgstat_report_wait_start(), ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferSerializeReserve(), ReorderBufferDiskChange::size, ReorderBufferChange::snapshot, HeapTupleData::t_len, ReorderBufferChange::tp, ReorderBufferChange::truncate, write, SnapshotData::xcnt, and ReorderBufferTXN::xid.

Referenced by ReorderBufferSerializeTXN().

◆ ReorderBufferSerializedPath()

◆ ReorderBufferSerializeReserve()

static void ReorderBufferSerializeReserve ( ReorderBuffer rb,
Size  sz 
)
static

Definition at line 3772 of file reorderbuffer.c.

3773{
3774 if (!rb->outbufsize)
3775 {
3776 rb->outbuf = MemoryContextAlloc(rb->context, sz);
3777 rb->outbufsize = sz;
3778 }
3779 else if (rb->outbufsize < sz)
3780 {
3781 rb->outbuf = repalloc(rb->outbuf, sz);
3782 rb->outbufsize = sz;
3783 }
3784}

References fb(), MemoryContextAlloc(), and repalloc().

Referenced by ReorderBufferRestoreChanges(), and ReorderBufferSerializeChange().

◆ ReorderBufferSerializeTXN()

static void ReorderBufferSerializeTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 3997 of file reorderbuffer.c.

3998{
4001 int fd = -1;
4003 Size spilled = 0;
4004 Size size = txn->size;
4005
4006 elog(DEBUG2, "spill %u changes in XID %u to disk",
4007 (uint32) txn->nentries_mem, txn->xid);
4008
4009 /* do the same to all child TXs */
4011 {
4013
4016 }
4017
4018 /* serialize changestream */
4020 {
4021 ReorderBufferChange *change;
4022
4023 change = dlist_container(ReorderBufferChange, node, change_i.cur);
4024
4025 /*
4026 * store in segment in which it belongs by start lsn, don't split over
4027 * multiple segments tho
4028 */
4029 if (fd == -1 ||
4031 {
4032 char path[MAXPGPATH];
4033
4034 if (fd != -1)
4036
4038
4039 /*
4040 * No need to care about TLIs here, only used during a single run,
4041 * so each LSN only maps to a specific WAL record.
4042 */
4044 curOpenSegNo);
4045
4046 /* open segment, create it if necessary */
4047 fd = OpenTransientFile(path,
4049
4050 if (fd < 0)
4051 ereport(ERROR,
4053 errmsg("could not open file \"%s\": %m", path)));
4054 }
4055
4056 ReorderBufferSerializeChange(rb, txn, fd, change);
4057 dlist_delete(&change->node);
4058 ReorderBufferFreeChange(rb, change, false);
4059
4060 spilled++;
4061 }
4062
4063 /* Update the memory counter */
4064 ReorderBufferChangeMemoryUpdate(rb, NULL, txn, false, size);
4065
4066 /* update the statistics iff we have spilled anything */
4067 if (spilled)
4068 {
4069 rb->spillCount += 1;
4070 rb->spillBytes += size;
4071
4072 /* don't consider already serialized transactions */
4073 rb->spillTxns += (rbtxn_is_serialized(txn) || rbtxn_is_serialized_clear(txn)) ? 0 : 1;
4074
4075 /* update the decoding stats */
4077 }
4078
4079 Assert(spilled == txn->nentries_mem);
4081 txn->nentries_mem = 0;
4083
4084 if (fd != -1)
4086}

References Assert, ReorderBufferTXN::changes, CloseTransientFile(), DEBUG2, dlist_container, dlist_delete(), dlist_foreach, dlist_foreach_modify, dlist_is_empty(), elog, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), ReorderBufferChange::lsn, MAXPGPATH, MyReplicationSlot, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, OpenTransientFile(), PG_BINARY, RBTXN_IS_SERIALIZED, rbtxn_is_serialized, rbtxn_is_serialized_clear, ReorderBufferChangeMemoryUpdate(), ReorderBufferFreeChange(), ReorderBufferSerializeChange(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReorderBufferTXN::size, ReorderBufferTXN::subtxns, ReorderBufferTXN::txn_flags, UpdateDecodingStats(), wal_segment_size, ReorderBufferTXN::xid, XLByteInSeg, and XLByteToSeg.

Referenced by ReorderBufferCheckMemoryLimit(), ReorderBufferIterTXNInit(), and ReorderBufferSerializeTXN().

◆ ReorderBufferSetBaseSnapshot()

void ReorderBufferSetBaseSnapshot ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Snapshot  snap 
)

Definition at line 3326 of file reorderbuffer.c.

3328{
3329 ReorderBufferTXN *txn;
3330 bool is_new;
3331
3332 Assert(snap != NULL);
3333
3334 /*
3335 * Fetch the transaction to operate on. If we know it's a subtransaction,
3336 * operate on its top-level transaction instead.
3337 */
3338 txn = ReorderBufferTXNByXid(rb, xid, true, &is_new, lsn, true);
3339 if (rbtxn_is_known_subxact(txn))
3340 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3341 NULL, InvalidXLogRecPtr, false);
3342 Assert(txn->base_snapshot == NULL);
3343
3344 txn->base_snapshot = snap;
3345 txn->base_snapshot_lsn = lsn;
3346 dlist_push_tail(&rb->txns_by_base_snapshot_lsn, &txn->base_snapshot_node);
3347
3349}

References Assert, AssertTXNLsnOrder(), ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_lsn, ReorderBufferTXN::base_snapshot_node, dlist_push_tail(), fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, ReorderBufferTXNByXid(), and ReorderBufferTXN::toplevel_xid.

Referenced by SnapBuildCommitTxn(), and SnapBuildProcessChange().

◆ ReorderBufferSetRestartPoint()

void ReorderBufferSetRestartPoint ( ReorderBuffer rb,
XLogRecPtr  ptr 
)

Definition at line 1087 of file reorderbuffer.c.

1088{
1089 rb->current_restart_decoding_lsn = ptr;
1090}

References fb().

Referenced by SnapBuildRestore(), and SnapBuildSerialize().

◆ ReorderBufferSkipPrepare()

void ReorderBufferSkipPrepare ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 2939 of file reorderbuffer.c.

2940{
2941 ReorderBufferTXN *txn;
2942
2943 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2944
2945 /* unknown transaction, nothing to do */
2946 if (txn == NULL)
2947 return;
2948
2949 /* txn must have been marked as a prepared transaction */
2952}

References Assert, fb(), InvalidXLogRecPtr, RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SKIPPED_PREPARE, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by DecodePrepare().

◆ ReorderBufferStreamCommit()

static void ReorderBufferStreamCommit ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1983 of file reorderbuffer.c.

1984{
1985 /* we should only call this for previously streamed transactions */
1987
1989
1990 if (rbtxn_is_prepared(txn))
1991 {
1992 /*
1993 * Note, we send stream prepare even if a concurrent abort is
1994 * detected. See DecodePrepare for more information.
1995 */
1997 rb->stream_prepare(rb, txn, txn->final_lsn);
1999
2000 /*
2001 * This is a PREPARED transaction, part of a two-phase commit. The
2002 * full cleanup will happen as part of the COMMIT PREPAREDs, so now
2003 * just truncate txn by removing changes and tuplecids.
2004 */
2005 ReorderBufferTruncateTXN(rb, txn, true);
2006 /* Reset the CheckXidAlive */
2008 }
2009 else
2010 {
2011 rb->stream_commit(rb, txn, txn->final_lsn);
2013 }
2014}

References Assert, CheckXidAlive, fb(), ReorderBufferTXN::final_lsn, InvalidTransactionId, rbtxn_is_prepared, rbtxn_is_streamed, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, ReorderBufferCleanupTXN(), ReorderBufferStreamTXN(), ReorderBufferTruncateTXN(), and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferReplay().

◆ ReorderBufferStreamTXN()

static void ReorderBufferStreamTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4342 of file reorderbuffer.c.

4343{
4344 Snapshot snapshot_now;
4345 CommandId command_id;
4346 Size stream_bytes;
4347 bool txn_is_streamed;
4348
4349 /* We can never reach here for a subtransaction. */
4350 Assert(rbtxn_is_toptxn(txn));
4351
4352 /*
4353 * We can't make any assumptions about base snapshot here, similar to what
4354 * ReorderBufferCommit() does. That relies on base_snapshot getting
4355 * transferred from subxact in ReorderBufferCommitChild(), but that was
4356 * not yet called as the transaction is in-progress.
4357 *
4358 * So just walk the subxacts and use the same logic here. But we only need
4359 * to do that once, when the transaction is streamed for the first time.
4360 * After that we need to reuse the snapshot from the previous run.
4361 *
4362 * Unlike DecodeCommit which adds xids of all the subtransactions in
4363 * snapshot's xip array via SnapBuildCommitTxn, we can't do that here but
4364 * we do add them to subxip array instead via ReorderBufferCopySnap. This
4365 * allows the catalog changes made in subtransactions decoded till now to
4366 * be visible.
4367 */
4368 if (txn->snapshot_now == NULL)
4369 {
4371
4372 /* make sure this transaction is streamed for the first time */
4374
4375 /* at the beginning we should have invalid command ID */
4377
4379 {
4381
4384 }
4385
4386 /*
4387 * If this transaction has no snapshot, it didn't make any changes to
4388 * the database till now, so there's nothing to decode.
4389 */
4390 if (txn->base_snapshot == NULL)
4391 {
4392 Assert(txn->ninvalidations == 0);
4393 return;
4394 }
4395
4396 command_id = FirstCommandId;
4397 snapshot_now = ReorderBufferCopySnap(rb, txn->base_snapshot,
4398 txn, command_id);
4399 }
4400 else
4401 {
4402 /* the transaction must have been already streamed */
4404
4405 /*
4406 * Nah, we already have snapshot from the previous streaming run. We
4407 * assume new subxacts can't move the LSN backwards, and so can't beat
4408 * the LSN condition in the previous branch (so no need to walk
4409 * through subxacts again). In fact, we must not do that as we may be
4410 * using snapshot half-way through the subxact.
4411 */
4412 command_id = txn->command_id;
4413
4414 /*
4415 * We can't use txn->snapshot_now directly because after the last
4416 * streaming run, we might have got some new sub-transactions. So we
4417 * need to add them to the snapshot.
4418 */
4419 snapshot_now = ReorderBufferCopySnap(rb, txn->snapshot_now,
4420 txn, command_id);
4421
4422 /* Free the previously copied snapshot. */
4423 Assert(txn->snapshot_now->copied);
4425 txn->snapshot_now = NULL;
4426 }
4427
4428 /*
4429 * Remember this information to be used later to update stats. We can't
4430 * update the stats here as an error while processing the changes would
4431 * lead to the accumulation of stats even though we haven't streamed all
4432 * the changes.
4433 */
4435 stream_bytes = txn->total_size;
4436
4437 /* Process and send the changes to output plugin. */
4438 ReorderBufferProcessTXN(rb, txn, InvalidXLogRecPtr, snapshot_now,
4439 command_id, true);
4440
4441 rb->streamCount += 1;
4442 rb->streamBytes += stream_bytes;
4443
4444 /* Don't consider already streamed transaction. */
4445 rb->streamTxns += (txn_is_streamed) ? 0 : 1;
4446
4447 /* update the decoding stats */
4449
4451 Assert(txn->nentries == 0);
4452 Assert(txn->nentries_mem == 0);
4453}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::changes, ReorderBufferTXN::command_id, SnapshotData::copied, dlist_container, dlist_foreach, dlist_is_empty(), fb(), FirstCommandId, InvalidCommandId, InvalidXLogRecPtr, ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, rbtxn_is_toptxn, ReorderBufferCopySnap(), ReorderBufferFreeSnap(), ReorderBufferProcessTXN(), ReorderBufferTransferSnapToParent(), ReorderBufferTXN::snapshot_now, ReorderBufferTXN::subtxns, ReorderBufferTXN::total_size, and UpdateDecodingStats().

Referenced by ReorderBufferCheckMemoryLimit(), ReorderBufferProcessPartialChange(), and ReorderBufferStreamCommit().

◆ ReorderBufferToastAppendChunk()

static void ReorderBufferToastAppendChunk ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change 
)
static

Definition at line 4995 of file reorderbuffer.c.

4997{
5000 bool found;
5002 bool isnull;
5003 Pointer chunk;
5004 TupleDesc desc = RelationGetDescr(relation);
5005 Oid chunk_id;
5007
5008 if (txn->toast_hash == NULL)
5010
5011 Assert(IsToastRelation(relation));
5012
5013 newtup = change->data.tp.newtuple;
5014 chunk_id = DatumGetObjectId(fastgetattr(newtup, 1, desc, &isnull));
5015 Assert(!isnull);
5016 chunk_seq = DatumGetInt32(fastgetattr(newtup, 2, desc, &isnull));
5017 Assert(!isnull);
5018
5020 hash_search(txn->toast_hash, &chunk_id, HASH_ENTER, &found);
5021
5022 if (!found)
5023 {
5024 Assert(ent->chunk_id == chunk_id);
5025 ent->num_chunks = 0;
5026 ent->last_chunk_seq = 0;
5027 ent->size = 0;
5028 ent->reconstructed = NULL;
5029 dlist_init(&ent->chunks);
5030
5031 if (chunk_seq != 0)
5032 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq 0",
5033 chunk_seq, chunk_id);
5034 }
5035 else if (found && chunk_seq != ent->last_chunk_seq + 1)
5036 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq %d",
5037 chunk_seq, chunk_id, ent->last_chunk_seq + 1);
5038
5039 chunk = DatumGetPointer(fastgetattr(newtup, 3, desc, &isnull));
5040 Assert(!isnull);
5041
5042 /* calculate size so we can allocate the right size at once later */
5045 else if (VARATT_IS_SHORT(chunk))
5046 /* could happen due to heap_form_tuple doing its thing */
5048 else
5049 elog(ERROR, "unexpected type of toast chunk");
5050
5051 ent->size += chunksize;
5052 ent->last_chunk_seq = chunk_seq;
5053 ent->num_chunks++;
5054 dlist_push_tail(&ent->chunks, &change->node);
5055}

References Assert, ReorderBufferChange::data, DatumGetInt32(), DatumGetObjectId(), DatumGetPointer(), dlist_init(), dlist_push_tail(), elog, ERROR, fastgetattr(), fb(), HASH_ENTER, hash_search(), IsToastRelation(), ReorderBufferChange::newtuple, ReorderBufferChange::node, RelationGetDescr, ReorderBufferToastInitHash(), ReorderBufferTXN::toast_hash, ReorderBufferChange::tp, VARATT_IS_EXTENDED(), VARATT_IS_SHORT(), VARHDRSZ, VARHDRSZ_SHORT, VARSIZE(), and VARSIZE_SHORT().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferToastInitHash()

static void ReorderBufferToastInitHash ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4975 of file reorderbuffer.c.

4976{
4978
4979 Assert(txn->toast_hash == NULL);
4980
4981 hash_ctl.keysize = sizeof(Oid);
4982 hash_ctl.entrysize = sizeof(ReorderBufferToastEnt);
4983 hash_ctl.hcxt = rb->context;
4984 txn->toast_hash = hash_create("ReorderBufferToastHash", 5, &hash_ctl,
4986}

References Assert, fb(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, and ReorderBufferTXN::toast_hash.

Referenced by ReorderBufferToastAppendChunk().

◆ ReorderBufferToastReplace()

static void ReorderBufferToastReplace ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change 
)
static

Definition at line 5078 of file reorderbuffer.c.

5080{
5081 TupleDesc desc;
5082 int natt;
5083 Datum *attrs;
5084 bool *isnull;
5085 bool *free;
5087 Relation toast_rel;
5089 MemoryContext oldcontext;
5091 Size old_size;
5092
5093 /* no toast tuples changed */
5094 if (txn->toast_hash == NULL)
5095 return;
5096
5097 /*
5098 * We're going to modify the size of the change. So, to make sure the
5099 * accounting is correct we record the current change size and then after
5100 * re-computing the change we'll subtract the recorded size and then
5101 * re-add the new change size at the end. We don't immediately subtract
5102 * the old size because if there is any error before we add the new size,
5103 * we will release the changes and that will update the accounting info
5104 * (subtracting the size from the counters). And we don't want to
5105 * underflow there.
5106 */
5108
5109 oldcontext = MemoryContextSwitchTo(rb->context);
5110
5111 /* we should only have toast tuples in an INSERT or UPDATE */
5112 Assert(change->data.tp.newtuple);
5113
5114 desc = RelationGetDescr(relation);
5115
5116 toast_rel = RelationIdGetRelation(relation->rd_rel->reltoastrelid);
5117 if (!RelationIsValid(toast_rel))
5118 elog(ERROR, "could not open toast relation with OID %u (base relation \"%s\")",
5119 relation->rd_rel->reltoastrelid, RelationGetRelationName(relation));
5120
5121 toast_desc = RelationGetDescr(toast_rel);
5122
5123 /* should we allocate from stack instead? */
5124 attrs = palloc0_array(Datum, desc->natts);
5125 isnull = palloc0_array(bool, desc->natts);
5126 free = palloc0_array(bool, desc->natts);
5127
5128 newtup = change->data.tp.newtuple;
5129
5130 heap_deform_tuple(newtup, desc, attrs, isnull);
5131
5132 for (natt = 0; natt < desc->natts; natt++)
5133 {
5136 struct varlena *varlena;
5137
5138 /* va_rawsize is the size of the original datum -- including header */
5139 struct varatt_external toast_pointer;
5141 struct varlena *new_datum = NULL;
5142 struct varlena *reconstructed;
5143 dlist_iter it;
5144 Size data_done = 0;
5145
5146 if (attr->attisdropped)
5147 continue;
5148
5149 /* not a varlena datatype */
5150 if (attr->attlen != -1)
5151 continue;
5152
5153 /* no data */
5154 if (isnull[natt])
5155 continue;
5156
5157 /* ok, we know we have a toast datum */
5158 varlena = (struct varlena *) DatumGetPointer(attrs[natt]);
5159
5160 /* no need to do anything if the tuple isn't external */
5162 continue;
5163
5164 VARATT_EXTERNAL_GET_POINTER(toast_pointer, varlena);
5165
5166 /*
5167 * Check whether the toast tuple changed, replace if so.
5168 */
5171 &toast_pointer.va_valueid,
5172 HASH_FIND,
5173 NULL);
5174 if (ent == NULL)
5175 continue;
5176
5177 new_datum =
5179
5180 free[natt] = true;
5181
5182 reconstructed = palloc0(toast_pointer.va_rawsize);
5183
5184 ent->reconstructed = reconstructed;
5185
5186 /* stitch toast tuple back together from its parts */
5187 dlist_foreach(it, &ent->chunks)
5188 {
5189 bool cisnull;
5192 Pointer chunk;
5193
5195 ctup = cchange->data.tp.newtuple;
5197
5198 Assert(!cisnull);
5201
5202 memcpy(VARDATA(reconstructed) + data_done,
5203 VARDATA(chunk),
5206 }
5207 Assert(data_done == VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer));
5208
5209 /* make sure its marked as compressed or not */
5210 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
5211 SET_VARSIZE_COMPRESSED(reconstructed, data_done + VARHDRSZ);
5212 else
5213 SET_VARSIZE(reconstructed, data_done + VARHDRSZ);
5214
5216 redirect_pointer.pointer = reconstructed;
5217
5220 sizeof(redirect_pointer));
5221
5223 }
5224
5225 /*
5226 * Build tuple in separate memory & copy tuple back into the tuplebuf
5227 * passed to the output plugin. We can't directly heap_fill_tuple() into
5228 * the tuplebuf because attrs[] will point back into the current content.
5229 */
5230 tmphtup = heap_form_tuple(desc, attrs, isnull);
5231 Assert(newtup->t_len <= MaxHeapTupleSize);
5232 Assert(newtup->t_data == (HeapTupleHeader) ((char *) newtup + HEAPTUPLESIZE));
5233
5234 memcpy(newtup->t_data, tmphtup->t_data, tmphtup->t_len);
5235 newtup->t_len = tmphtup->t_len;
5236
5237 /*
5238 * free resources we won't further need, more persistent stuff will be
5239 * free'd in ReorderBufferToastReset().
5240 */
5241 RelationClose(toast_rel);
5242 pfree(tmphtup);
5243 for (natt = 0; natt < desc->natts; natt++)
5244 {
5245 if (free[natt])
5247 }
5248 pfree(attrs);
5249 pfree(free);
5250 pfree(isnull);
5251
5252 MemoryContextSwitchTo(oldcontext);
5253
5254 /* subtract the old change size */
5256 /* now add the change back, with the correct size */
5258 ReorderBufferChangeSize(change));
5259}

References Assert, CompactAttribute::attisdropped, CompactAttribute::attlen, ReorderBufferChange::data, DatumGetPointer(), dlist_container, dlist_foreach, elog, ERROR, fastgetattr(), fb(), free, HASH_FIND, hash_search(), heap_deform_tuple(), heap_form_tuple(), HEAPTUPLESIZE, INDIRECT_POINTER_SIZE, MaxHeapTupleSize, MemoryContextSwitchTo(), TupleDescData::natts, ReorderBufferChange::newtuple, palloc0(), palloc0_array, pfree(), PointerGetDatum(), RelationData::rd_rel, RelationClose(), RelationGetDescr, RelationGetRelationName, RelationIdGetRelation(), RelationIsValid, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), SET_VARSIZE(), SET_VARSIZE_COMPRESSED(), SET_VARTAG_EXTERNAL(), ReorderBufferTXN::toast_hash, ReorderBufferChange::tp, TupleDescCompactAttr(), varatt_external::va_rawsize, varatt_external::va_valueid, VARATT_EXTERNAL_GET_EXTSIZE(), VARATT_EXTERNAL_GET_POINTER, VARATT_EXTERNAL_IS_COMPRESSED(), VARATT_IS_EXTERNAL(), VARATT_IS_SHORT(), VARDATA(), VARDATA_EXTERNAL(), VARHDRSZ, VARSIZE(), and VARTAG_INDIRECT.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferToastReset()

static void ReorderBufferToastReset ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 5265 of file reorderbuffer.c.

5266{
5269
5270 if (txn->toast_hash == NULL)
5271 return;
5272
5273 /* sequentially walk over the hash and free everything */
5276 {
5278
5279 if (ent->reconstructed != NULL)
5280 pfree(ent->reconstructed);
5281
5282 dlist_foreach_modify(it, &ent->chunks)
5283 {
5284 ReorderBufferChange *change =
5286
5287 dlist_delete(&change->node);
5288 ReorderBufferFreeChange(rb, change, true);
5289 }
5290 }
5291
5293 txn->toast_hash = NULL;
5294}

References dlist_container, dlist_delete(), dlist_foreach_modify, fb(), hash_destroy(), hash_seq_init(), hash_seq_search(), ReorderBufferChange::node, pfree(), ReorderBufferFreeChange(), and ReorderBufferTXN::toast_hash.

Referenced by ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferFreeTXN(), ReorderBufferProcessTXN(), and ReorderBufferResetTXN().

◆ ReorderBufferTransferSnapToParent()

static void ReorderBufferTransferSnapToParent ( ReorderBufferTXN txn,
ReorderBufferTXN subtxn 
)
static

Definition at line 1165 of file reorderbuffer.c.

1167{
1168 Assert(subtxn->toplevel_xid == txn->xid);
1169
1170 if (subtxn->base_snapshot != NULL)
1171 {
1172 if (txn->base_snapshot == NULL ||
1173 subtxn->base_snapshot_lsn < txn->base_snapshot_lsn)
1174 {
1175 /*
1176 * If the toplevel transaction already has a base snapshot but
1177 * it's newer than the subxact's, purge it.
1178 */
1179 if (txn->base_snapshot != NULL)
1180 {
1183 }
1184
1185 /*
1186 * The snapshot is now the top transaction's; transfer it, and
1187 * adjust the list position of the top transaction in the list by
1188 * moving it to where the subtransaction is.
1189 */
1190 txn->base_snapshot = subtxn->base_snapshot;
1191 txn->base_snapshot_lsn = subtxn->base_snapshot_lsn;
1192 dlist_insert_before(&subtxn->base_snapshot_node,
1193 &txn->base_snapshot_node);
1194
1195 /*
1196 * The subtransaction doesn't have a snapshot anymore (so it
1197 * mustn't be in the list.)
1198 */
1199 subtxn->base_snapshot = NULL;
1200 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1201 dlist_delete(&subtxn->base_snapshot_node);
1202 }
1203 else
1204 {
1205 /* Base snap of toplevel is fine, so subxact's is not needed */
1206 SnapBuildSnapDecRefcount(subtxn->base_snapshot);
1207 dlist_delete(&subtxn->base_snapshot_node);
1208 subtxn->base_snapshot = NULL;
1209 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1210 }
1211 }
1212}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_lsn, ReorderBufferTXN::base_snapshot_node, dlist_delete(), dlist_insert_before(), fb(), InvalidXLogRecPtr, SnapBuildSnapDecRefcount(), and ReorderBufferTXN::xid.

Referenced by ReorderBufferAssignChild(), and ReorderBufferStreamTXN().

◆ ReorderBufferTruncateTXN()

static void ReorderBufferTruncateTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
bool  txn_prepared 
)
static

Definition at line 1656 of file reorderbuffer.c.

1657{
1658 dlist_mutable_iter iter;
1659 Size mem_freed = 0;
1660
1661 /* cleanup subtransactions & their changes */
1662 dlist_foreach_modify(iter, &txn->subtxns)
1663 {
1665
1667
1668 /*
1669 * Subtransactions are always associated to the toplevel TXN, even if
1670 * they originally were happening inside another subtxn, so we won't
1671 * ever recurse more than one level deep here.
1672 */
1674 Assert(subtxn->nsubtxns == 0);
1675
1678 }
1679
1680 /* cleanup changes in the txn */
1681 dlist_foreach_modify(iter, &txn->changes)
1682 {
1683 ReorderBufferChange *change;
1684
1685 change = dlist_container(ReorderBufferChange, node, iter.cur);
1686
1687 /* Check we're not mixing changes from different transactions. */
1688 Assert(change->txn == txn);
1689
1690 /* remove the change from its containing list */
1691 dlist_delete(&change->node);
1692
1693 /*
1694 * Instead of updating the memory counter for individual changes, we
1695 * sum up the size of memory to free so we can update the memory
1696 * counter all together below. This saves costs of maintaining the
1697 * max-heap.
1698 */
1700
1701 ReorderBufferFreeChange(rb, change, false);
1702 }
1703
1704 /* Update the memory counter */
1706
1707 if (txn_prepared)
1708 {
1709 /*
1710 * If this is a prepared txn, cleanup the tuplecids we stored for
1711 * decoding catalog snapshot access. They are always stored in the
1712 * toplevel transaction.
1713 */
1714 dlist_foreach_modify(iter, &txn->tuplecids)
1715 {
1716 ReorderBufferChange *change;
1717
1718 change = dlist_container(ReorderBufferChange, node, iter.cur);
1719
1720 /* Check we're not mixing changes from different transactions. */
1721 Assert(change->txn == txn);
1723
1724 /* Remove the change from its containing list. */
1725 dlist_delete(&change->node);
1726
1727 ReorderBufferFreeChange(rb, change, true);
1728 }
1729 }
1730
1731 /*
1732 * Destroy the (relfilelocator, ctid) hashtable, so that we don't leak any
1733 * memory. We could also keep the hash table and update it with new ctid
1734 * values, but this seems simpler and good enough for now.
1735 */
1736 if (txn->tuplecid_hash != NULL)
1737 {
1739 txn->tuplecid_hash = NULL;
1740 }
1741
1742 /* If this txn is serialized then clean the disk space. */
1743 if (rbtxn_is_serialized(txn))
1744 {
1747
1748 /*
1749 * We set this flag to indicate if the transaction is ever serialized.
1750 * We need this to accurately update the stats as otherwise the same
1751 * transaction can be counted as serialized multiple times.
1752 */
1754 }
1755
1756 /* also reset the number of entries in the transaction */
1757 txn->nentries_mem = 0;
1758 txn->nentries = 0;
1759}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::changes, dlist_mutable_iter::cur, dlist_container, dlist_delete(), dlist_foreach_modify, fb(), hash_destroy(), ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, rbtxn_is_known_subxact, rbtxn_is_serialized, RBTXN_IS_SERIALIZED_CLEAR, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferFreeChange(), ReorderBufferMaybeMarkTXNStreamed(), ReorderBufferRestoreCleanup(), ReorderBufferTruncateTXN(), ReorderBufferTXN::subtxns, ReorderBufferTXN::tuplecid_hash, ReorderBufferTXN::tuplecids, ReorderBufferChange::txn, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferProcessTXN(), ReorderBufferResetTXN(), ReorderBufferStreamCommit(), and ReorderBufferTruncateTXN().

◆ ReorderBufferTXNByXid()

static ReorderBufferTXN * ReorderBufferTXNByXid ( ReorderBuffer rb,
TransactionId  xid,
bool  create,
bool is_new,
XLogRecPtr  lsn,
bool  create_as_top 
)
static

Definition at line 653 of file reorderbuffer.c.

655{
656 ReorderBufferTXN *txn;
658 bool found;
659
661
662 /*
663 * Check the one-entry lookup cache first
664 */
665 if (TransactionIdIsValid(rb->by_txn_last_xid) &&
666 rb->by_txn_last_xid == xid)
667 {
668 txn = rb->by_txn_last_txn;
669
670 if (txn != NULL)
671 {
672 /* found it, and it's valid */
673 if (is_new)
674 *is_new = false;
675 return txn;
676 }
677
678 /*
679 * cached as non-existent, and asked not to create? Then nothing else
680 * to do.
681 */
682 if (!create)
683 return NULL;
684 /* otherwise fall through to create it */
685 }
686
687 /*
688 * If the cache wasn't hit or it yielded a "does-not-exist" and we want to
689 * create an entry.
690 */
691
692 /* search the lookup table */
694 hash_search(rb->by_txn,
695 &xid,
696 create ? HASH_ENTER : HASH_FIND,
697 &found);
698 if (found)
699 txn = ent->txn;
700 else if (create)
701 {
702 /* initialize the new entry, if creation was requested */
703 Assert(ent != NULL);
705
707 ent->txn->xid = xid;
708 txn = ent->txn;
709 txn->first_lsn = lsn;
710 txn->restart_decoding_lsn = rb->current_restart_decoding_lsn;
711
712 if (create_as_top)
713 {
714 dlist_push_tail(&rb->toplevel_by_lsn, &txn->node);
716 }
717 }
718 else
719 txn = NULL; /* not found and not asked to create */
720
721 /* update cache */
722 rb->by_txn_last_xid = xid;
723 rb->by_txn_last_txn = txn;
724
725 if (is_new)
726 *is_new = !found;
727
728 Assert(!create || txn != NULL);
729 return txn;
730}

References Assert, AssertTXNLsnOrder(), dlist_push_tail(), fb(), ReorderBufferTXN::first_lsn, HASH_ENTER, HASH_FIND, hash_search(), ReorderBufferTXN::node, ReorderBufferAllocTXN(), ReorderBufferTXN::restart_decoding_lsn, TransactionIdIsValid, and XLogRecPtrIsValid.

Referenced by ReorderBufferAbort(), ReorderBufferAddDistributedInvalidations(), ReorderBufferAddInvalidations(), ReorderBufferAddNewTupleCids(), ReorderBufferAssignChild(), ReorderBufferCommit(), ReorderBufferCommitChild(), ReorderBufferFinishPrepared(), ReorderBufferForget(), ReorderBufferGetInvalidations(), ReorderBufferInvalidate(), ReorderBufferPrepare(), ReorderBufferProcessXid(), ReorderBufferQueueChange(), ReorderBufferQueueMessage(), ReorderBufferRememberPrepareInfo(), ReorderBufferSetBaseSnapshot(), ReorderBufferSkipPrepare(), ReorderBufferXidHasBaseSnapshot(), ReorderBufferXidHasCatalogChanges(), and ReorderBufferXidSetCatalogChanges().

◆ ReorderBufferTXNSizeCompare()

static int ReorderBufferTXNSizeCompare ( const pairingheap_node a,
const pairingheap_node b,
void arg 
)
static

Definition at line 3789 of file reorderbuffer.c.

3790{
3793
3794 if (ta->size < tb->size)
3795 return -1;
3796 if (ta->size > tb->size)
3797 return 1;
3798 return 0;
3799}

References a, b, fb(), and pairingheap_const_container.

Referenced by ReorderBufferAllocate().

◆ ReorderBufferXidHasBaseSnapshot()

bool ReorderBufferXidHasBaseSnapshot ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 3742 of file reorderbuffer.c.

3743{
3744 ReorderBufferTXN *txn;
3745
3746 txn = ReorderBufferTXNByXid(rb, xid, false,
3747 NULL, InvalidXLogRecPtr, false);
3748
3749 /* transaction isn't known yet, ergo no snapshot */
3750 if (txn == NULL)
3751 return false;
3752
3753 /* a known subtxn? operate on top-level txn instead */
3754 if (rbtxn_is_known_subxact(txn))
3755 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3756 NULL, InvalidXLogRecPtr, false);
3757
3758 return txn->base_snapshot != NULL;
3759}

References ReorderBufferTXN::base_snapshot, fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, ReorderBufferTXNByXid(), and ReorderBufferTXN::toplevel_xid.

Referenced by SnapBuildCommitTxn(), SnapBuildDistributeSnapshotAndInval(), and SnapBuildProcessChange().

◆ ReorderBufferXidHasCatalogChanges()

bool ReorderBufferXidHasCatalogChanges ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 3725 of file reorderbuffer.c.

3726{
3727 ReorderBufferTXN *txn;
3728
3729 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3730 false);
3731 if (txn == NULL)
3732 return false;
3733
3734 return rbtxn_has_catalog_changes(txn);
3735}

References fb(), InvalidXLogRecPtr, rbtxn_has_catalog_changes, and ReorderBufferTXNByXid().

Referenced by SnapBuildXidHasCatalogChanges().

◆ ReorderBufferXidSetCatalogChanges()

void ReorderBufferXidSetCatalogChanges ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3653 of file reorderbuffer.c.

3655{
3656 ReorderBufferTXN *txn;
3657
3658 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3659
3660 if (!rbtxn_has_catalog_changes(txn))
3661 {
3663 dclist_push_tail(&rb->catchange_txns, &txn->catchange_node);
3664 }
3665
3666 /*
3667 * Mark top-level transaction as having catalog changes too if one of its
3668 * children has so that the ReorderBufferBuildTupleCidHash can
3669 * conveniently check just top-level transaction and decide whether to
3670 * build the hash table or not.
3671 */
3672 if (rbtxn_is_subtxn(txn))
3673 {
3674 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
3675
3676 if (!rbtxn_has_catalog_changes(toptxn))
3677 {
3679 dclist_push_tail(&rb->catchange_txns, &toptxn->catchange_node);
3680 }
3681 }
3682}

References ReorderBufferTXN::catchange_node, dclist_push_tail(), fb(), rbtxn_get_toptxn, RBTXN_HAS_CATALOG_CHANGES, rbtxn_has_catalog_changes, rbtxn_is_subtxn, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by SnapBuildProcessNewCid(), and xact_decode().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)

Definition at line 5557 of file reorderbuffer.c.

5561{
5564 ForkNumber forkno;
5565 BlockNumber blockno;
5566 bool updated_mapping = false;
5567
5568 /*
5569 * Return unresolved if tuplecid_data is not valid. That's because when
5570 * streaming in-progress transactions we may run into tuples with the CID
5571 * before actually decoding them. Think e.g. about INSERT followed by
5572 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5573 * INSERT. So in such cases, we assume the CID is from the future
5574 * command.
5575 */
5576 if (tuplecid_data == NULL)
5577 return false;
5578
5579 /* be careful about padding */
5580 memset(&key, 0, sizeof(key));
5581
5582 Assert(!BufferIsLocal(buffer));
5583
5584 /*
5585 * get relfilelocator from the buffer, no convenient way to access it
5586 * other than that.
5587 */
5588 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5589
5590 /* tuples can only be in the main fork */
5591 Assert(forkno == MAIN_FORKNUM);
5592 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5593
5594 ItemPointerCopy(&htup->t_self,
5595 &key.tid);
5596
5597restart:
5600
5601 /*
5602 * failed to find a mapping, check whether the table was rewritten and
5603 * apply mapping if so, but only do that once - there can be no new
5604 * mappings while we are in here since we have to hold a lock on the
5605 * relation.
5606 */
5607 if (ent == NULL && !updated_mapping)
5608 {
5610 /* now check but don't update for a mapping again */
5611 updated_mapping = true;
5612 goto restart;
5613 }
5614 else if (ent == NULL)
5615 return false;
5616
5617 if (cmin)
5618 *cmin = ent->cmin;
5619 if (cmax)
5620 *cmax = ent->cmax;
5621 return true;
5622}

References Assert, BufferGetTag(), BufferIsLocal, fb(), HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ SetupCheckXidLive()

static void SetupCheckXidLive ( TransactionId  xid)
inlinestatic

Definition at line 2049 of file reorderbuffer.c.

2050{
2051 /*
2052 * If the input transaction id is already set as a CheckXidAlive then
2053 * nothing to do.
2054 */
2056 return;
2057
2058 /*
2059 * setup CheckXidAlive if it's not committed yet. We don't check if the
2060 * xid is aborted. That will happen during catalog access.
2061 */
2062 if (!TransactionIdDidCommit(xid))
2063 CheckXidAlive = xid;
2064 else
2066}

References CheckXidAlive, InvalidTransactionId, TransactionIdDidCommit(), and TransactionIdEquals.

Referenced by ReorderBufferProcessTXN().

◆ StartupReorderBuffer()

void StartupReorderBuffer ( void  )

Definition at line 4941 of file reorderbuffer.c.

4942{
4944 struct dirent *logical_de;
4945
4948 {
4949 if (strcmp(logical_de->d_name, ".") == 0 ||
4950 strcmp(logical_de->d_name, "..") == 0)
4951 continue;
4952
4953 /* if it cannot be a slot, skip the directory */
4954 if (!ReplicationSlotValidateName(logical_de->d_name, true, DEBUG2))
4955 continue;
4956
4957 /*
4958 * ok, has to be a surviving logical slot, iterate and delete
4959 * everything starting with xid-*
4960 */
4962 }
4964}

References AllocateDir(), DEBUG2, fb(), FreeDir(), PG_REPLSLOT_DIR, ReadDir(), ReorderBufferCleanupSerializedTXNs(), and ReplicationSlotValidateName().

Referenced by StartupXLOG().

◆ TransactionIdInArray()

static bool TransactionIdInArray ( TransactionId  xid,
TransactionId xip,
Size  num 
)
static

Definition at line 5456 of file reorderbuffer.c.

5457{
5458 return bsearch(&xid, xip, num,
5459 sizeof(TransactionId), xidComparator) != NULL;
5460}

References fb(), and xidComparator().

Referenced by UpdateLogicalMappings().

◆ UpdateLogicalMappings()

static void UpdateLogicalMappings ( HTAB tuplecid_data,
Oid  relid,
Snapshot  snapshot 
)
static

Definition at line 5479 of file reorderbuffer.c.

5480{
5482 struct dirent *mapping_de;
5483 List *files = NIL;
5484 ListCell *file;
5485 Oid dboid = IsSharedRelation(relid) ? InvalidOid : MyDatabaseId;
5486
5489 {
5490 Oid f_dboid;
5491 Oid f_relid;
5495 uint32 f_hi,
5496 f_lo;
5498
5499 if (strcmp(mapping_de->d_name, ".") == 0 ||
5500 strcmp(mapping_de->d_name, "..") == 0)
5501 continue;
5502
5503 /* Ignore files that aren't ours */
5504 if (strncmp(mapping_de->d_name, "map-", 4) != 0)
5505 continue;
5506
5508 &f_dboid, &f_relid, &f_hi, &f_lo,
5509 &f_mapped_xid, &f_create_xid) != 6)
5510 elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
5511
5512 f_lsn = ((uint64) f_hi) << 32 | f_lo;
5513
5514 /* mapping for another database */
5515 if (f_dboid != dboid)
5516 continue;
5517
5518 /* mapping for another relation */
5519 if (f_relid != relid)
5520 continue;
5521
5522 /* did the creating transaction abort? */
5524 continue;
5525
5526 /* not for our transaction */
5527 if (!TransactionIdInArray(f_mapped_xid, snapshot->subxip, snapshot->subxcnt))
5528 continue;
5529
5530 /* ok, relevant, queue for apply */
5532 f->lsn = f_lsn;
5533 strcpy(f->fname, mapping_de->d_name);
5534 files = lappend(files, f);
5535 }
5537
5538 /* sort files so we apply them in LSN order */
5540
5541 foreach(file, files)
5542 {
5544
5545 elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
5546 snapshot->subxip[0]);
5548 pfree(f);
5549 }
5550}

References AllocateDir(), ApplyLogicalMappingFile(), DEBUG1, elog, ERROR, fb(), file_sort_by_lsn(), RewriteMappingFile::fname, FreeDir(), InvalidOid, IsSharedRelation(), lappend(), lfirst, list_sort(), LOGICAL_REWRITE_FORMAT, RewriteMappingFile::lsn, MyDatabaseId, NIL, palloc_object, pfree(), PG_LOGICAL_MAPPINGS_DIR, ReadDir(), SnapshotData::subxcnt, SnapshotData::subxip, TransactionIdDidCommit(), TransactionIdInArray(), and tuplecid_data.

Referenced by ResolveCminCmaxDuringDecoding().

Variable Documentation

◆ debug_logical_replication_streaming

◆ logical_decoding_work_mem

int logical_decoding_work_mem

Definition at line 225 of file reorderbuffer.c.

Referenced by ReorderBufferCheckMemoryLimit().

◆ max_changes_in_memory

const Size max_changes_in_memory = 4096
static

Definition at line 226 of file reorderbuffer.c.

Referenced by ReorderBufferRestoreChanges().