PostgreSQL Source Code git master
Loading...
Searching...
No Matches
reorderbuffer.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <sys/stat.h>
#include "access/detoast.h"
#include "access/heapam.h"
#include "access/rewriteheap.h"
#include "access/transam.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "catalog/catalog.h"
#include "common/int.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/reorderbuffer.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/procarray.h"
#include "storage/sinval.h"
#include "utils/builtins.h"
#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/relfilenumbermap.h"
Include dependency graph for reorderbuffer.c:

Go to the source code of this file.

Data Structures

struct  ReorderBufferTXNByIdEnt
 
struct  ReorderBufferTupleCidKey
 
struct  ReorderBufferTupleCidEnt
 
struct  TXNEntryFile
 
struct  ReorderBufferIterTXNEntry
 
struct  ReorderBufferIterTXNState
 
struct  ReorderBufferToastEnt
 
struct  ReorderBufferDiskChange
 
struct  RewriteMappingFile
 

Macros

#define MAX_DISTR_INVAL_MSG_PER_TXN    ((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage))
 
#define IsSpecInsert(action)
 
#define IsSpecConfirmOrAbort(action)
 
#define IsInsertOrUpdate(action)
 
#define CHANGES_THRESHOLD   100
 

Typedefs

typedef struct ReorderBufferTXNByIdEnt ReorderBufferTXNByIdEnt
 
typedef struct ReorderBufferTupleCidKey ReorderBufferTupleCidKey
 
typedef struct ReorderBufferTupleCidEnt ReorderBufferTupleCidEnt
 
typedef struct TXNEntryFile TXNEntryFile
 
typedef struct ReorderBufferIterTXNEntry ReorderBufferIterTXNEntry
 
typedef struct ReorderBufferIterTXNState ReorderBufferIterTXNState
 
typedef struct ReorderBufferToastEnt ReorderBufferToastEnt
 
typedef struct ReorderBufferDiskChange ReorderBufferDiskChange
 
typedef struct RewriteMappingFile RewriteMappingFile
 

Functions

static ReorderBufferTXNReorderBufferAllocTXN (ReorderBuffer *rb)
 
static void ReorderBufferFreeTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static ReorderBufferTXNReorderBufferTXNByXid (ReorderBuffer *rb, TransactionId xid, bool create, bool *is_new, XLogRecPtr lsn, bool create_as_top)
 
static void ReorderBufferTransferSnapToParent (ReorderBufferTXN *txn, ReorderBufferTXN *subtxn)
 
static void AssertTXNLsnOrder (ReorderBuffer *rb)
 
static void ReorderBufferIterTXNInit (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferIterTXNState *volatile *iter_state)
 
static ReorderBufferChangeReorderBufferIterTXNNext (ReorderBuffer *rb, ReorderBufferIterTXNState *state)
 
static void ReorderBufferIterTXNFinish (ReorderBuffer *rb, ReorderBufferIterTXNState *state)
 
static void ReorderBufferExecuteInvalidations (uint32 nmsgs, SharedInvalidationMessage *msgs)
 
static void ReorderBufferCheckMemoryLimit (ReorderBuffer *rb)
 
static void ReorderBufferSerializeTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferSerializeChange (ReorderBuffer *rb, ReorderBufferTXN *txn, int fd, ReorderBufferChange *change)
 
static Size ReorderBufferRestoreChanges (ReorderBuffer *rb, ReorderBufferTXN *txn, TXNEntryFile *file, XLogSegNo *segno)
 
static void ReorderBufferRestoreChange (ReorderBuffer *rb, ReorderBufferTXN *txn, char *data)
 
static void ReorderBufferRestoreCleanup (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferTruncateTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, bool txn_prepared)
 
static void ReorderBufferMaybeMarkTXNStreamed (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static bool ReorderBufferCheckAndTruncateAbortedTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferCleanupSerializedTXNs (const char *slotname)
 
static void ReorderBufferSerializedPath (char *path, ReplicationSlot *slot, TransactionId xid, XLogSegNo segno)
 
static int ReorderBufferTXNSizeCompare (const pairingheap_node *a, const pairingheap_node *b, void *arg)
 
static void ReorderBufferFreeSnap (ReorderBuffer *rb, Snapshot snap)
 
static Snapshot ReorderBufferCopySnap (ReorderBuffer *rb, Snapshot orig_snap, ReorderBufferTXN *txn, CommandId cid)
 
static bool ReorderBufferCanStream (ReorderBuffer *rb)
 
static bool ReorderBufferCanStartStreaming (ReorderBuffer *rb)
 
static void ReorderBufferStreamTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferStreamCommit (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastInitHash (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastReset (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferToastReplace (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
 
static void ReorderBufferToastAppendChunk (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
 
static Size ReorderBufferChangeSize (ReorderBufferChange *change)
 
static void ReorderBufferChangeMemoryUpdate (ReorderBuffer *rb, ReorderBufferChange *change, ReorderBufferTXN *txn, bool addition, Size sz)
 
ReorderBufferReorderBufferAllocate (void)
 
void ReorderBufferFree (ReorderBuffer *rb)
 
ReorderBufferChangeReorderBufferAllocChange (ReorderBuffer *rb)
 
void ReorderBufferFreeChange (ReorderBuffer *rb, ReorderBufferChange *change, bool upd_mem)
 
HeapTuple ReorderBufferAllocTupleBuf (ReorderBuffer *rb, Size tuple_len)
 
void ReorderBufferFreeTupleBuf (HeapTuple tuple)
 
OidReorderBufferAllocRelids (ReorderBuffer *rb, int nrelids)
 
void ReorderBufferFreeRelids (ReorderBuffer *rb, Oid *relids)
 
static void ReorderBufferProcessPartialChange (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool toast_insert)
 
void ReorderBufferQueueChange (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, ReorderBufferChange *change, bool toast_insert)
 
void ReorderBufferQueueMessage (ReorderBuffer *rb, TransactionId xid, Snapshot snap, XLogRecPtr lsn, bool transactional, const char *prefix, Size message_size, const char *message)
 
static void AssertChangeLsnOrder (ReorderBufferTXN *txn)
 
ReorderBufferTXNReorderBufferGetOldestTXN (ReorderBuffer *rb)
 
TransactionId ReorderBufferGetOldestXmin (ReorderBuffer *rb)
 
void ReorderBufferSetRestartPoint (ReorderBuffer *rb, XLogRecPtr ptr)
 
void ReorderBufferAssignChild (ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr lsn)
 
void ReorderBufferCommitChild (ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn)
 
static int ReorderBufferIterCompare (Datum a, Datum b, void *arg)
 
static void ReorderBufferCleanupTXN (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void ReorderBufferBuildTupleCidHash (ReorderBuffer *rb, ReorderBufferTXN *txn)
 
static void SetupCheckXidLive (TransactionId xid)
 
static void ReorderBufferApplyChange (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferApplyTruncate (ReorderBuffer *rb, ReorderBufferTXN *txn, int nrelations, Relation *relations, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferApplyMessage (ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool streaming)
 
static void ReorderBufferSaveTXNSnapshot (ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id)
 
static void ReorderBufferResetTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id, XLogRecPtr last_lsn, ReorderBufferChange *specinsert)
 
static void ReorderBufferProcessTXN (ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr commit_lsn, volatile Snapshot snapshot_now, volatile CommandId command_id, bool streaming)
 
static void ReorderBufferReplay (ReorderBufferTXN *txn, ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
 
void ReorderBufferCommit (ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
 
bool ReorderBufferRememberPrepareInfo (ReorderBuffer *rb, TransactionId xid, XLogRecPtr prepare_lsn, XLogRecPtr end_lsn, TimestampTz prepare_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
 
void ReorderBufferSkipPrepare (ReorderBuffer *rb, TransactionId xid)
 
void ReorderBufferPrepare (ReorderBuffer *rb, TransactionId xid, char *gid)
 
void ReorderBufferFinishPrepared (ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, XLogRecPtr two_phase_at, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn, char *gid, bool is_commit)
 
void ReorderBufferAbort (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, TimestampTz abort_time)
 
void ReorderBufferAbortOld (ReorderBuffer *rb, TransactionId oldestRunningXid)
 
void ReorderBufferForget (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferInvalidate (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferImmediateInvalidation (ReorderBuffer *rb, uint32 ninvalidations, SharedInvalidationMessage *invalidations)
 
void ReorderBufferProcessXid (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
void ReorderBufferAddSnapshot (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
 
void ReorderBufferSetBaseSnapshot (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
 
void ReorderBufferAddNewCommandId (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, CommandId cid)
 
void ReorderBufferAddNewTupleCids (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, RelFileLocator locator, ItemPointerData tid, CommandId cmin, CommandId cmax, CommandId combocid)
 
static void ReorderBufferQueueInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
static void ReorderBufferAccumulateInvalidations (SharedInvalidationMessage **invals_out, uint32 *ninvals_out, SharedInvalidationMessage *msgs_new, Size nmsgs_new)
 
void ReorderBufferAddInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
void ReorderBufferAddDistributedInvalidations (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
 
void ReorderBufferXidSetCatalogChanges (ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 
TransactionIdReorderBufferGetCatalogChangesXacts (ReorderBuffer *rb)
 
bool ReorderBufferXidHasCatalogChanges (ReorderBuffer *rb, TransactionId xid)
 
bool ReorderBufferXidHasBaseSnapshot (ReorderBuffer *rb, TransactionId xid)
 
static void ReorderBufferSerializeReserve (ReorderBuffer *rb, Size sz)
 
static ReorderBufferTXNReorderBufferLargestTXN (ReorderBuffer *rb)
 
static ReorderBufferTXNReorderBufferLargestStreamableTopTXN (ReorderBuffer *rb)
 
void StartupReorderBuffer (void)
 
static void ApplyLogicalMappingFile (HTAB *tuplecid_data, const char *fname)
 
static bool TransactionIdInArray (TransactionId xid, TransactionId *xip, Size num)
 
static int file_sort_by_lsn (const ListCell *a_p, const ListCell *b_p)
 
static void UpdateLogicalMappings (HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
 
bool ResolveCminCmaxDuringDecoding (HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
uint32 ReorderBufferGetInvalidations (ReorderBuffer *rb, TransactionId xid, SharedInvalidationMessage **msgs)
 

Variables

int logical_decoding_work_mem
 
static const Size max_changes_in_memory = 4096
 
int debug_logical_replication_streaming = DEBUG_LOGICAL_REP_STREAMING_BUFFERED
 

Macro Definition Documentation

◆ CHANGES_THRESHOLD

#define CHANGES_THRESHOLD   100

◆ IsInsertOrUpdate

#define IsInsertOrUpdate (   action)
Value:
( \
(((action) == REORDER_BUFFER_CHANGE_INSERT) || \
((action) == REORDER_BUFFER_CHANGE_UPDATE) || \
)
@ REORDER_BUFFER_CHANGE_INSERT
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT
@ REORDER_BUFFER_CHANGE_UPDATE

Definition at line 206 of file reorderbuffer.c.

324{
325 ReorderBuffer *buffer;
328
330
331 /* allocate memory in own context, to have better accountability */
333 "ReorderBuffer",
335
336 buffer =
338
339 memset(&hash_ctl, 0, sizeof(hash_ctl));
340
341 buffer->context = new_ctx;
342
344 "Change",
346 sizeof(ReorderBufferChange));
347
349 "TXN",
351 sizeof(ReorderBufferTXN));
352
353 /*
354 * To minimize memory fragmentation caused by long-running transactions
355 * with changes spanning multiple memory blocks, we use a single
356 * fixed-size memory block for decoded tuple storage. The performance
357 * testing showed that the default memory block size maintains logical
358 * decoding performance without causing fragmentation due to concurrent
359 * transactions. One might think that we can use the max size as
360 * SLAB_LARGE_BLOCK_SIZE but the test also showed it doesn't help resolve
361 * the memory fragmentation.
362 */
364 "Tuples",
368
369 hash_ctl.keysize = sizeof(TransactionId);
370 hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
371 hash_ctl.hcxt = buffer->context;
372
373 buffer->by_txn = hash_create("ReorderBufferByXid", 1000, &hash_ctl,
375
377 buffer->by_txn_last_txn = NULL;
378
379 buffer->outbuf = NULL;
380 buffer->outbufsize = 0;
381 buffer->size = 0;
382
383 /* txn_heap is ordered by transaction size */
385
386 buffer->spillTxns = 0;
387 buffer->spillCount = 0;
388 buffer->spillBytes = 0;
389 buffer->streamTxns = 0;
390 buffer->streamCount = 0;
391 buffer->streamBytes = 0;
392 buffer->memExceededCount = 0;
393 buffer->totalTxns = 0;
394 buffer->totalBytes = 0;
395
397
398 dlist_init(&buffer->toplevel_by_lsn);
400 dclist_init(&buffer->catchange_txns);
401
402 /*
403 * Ensure there's no stale data from prior uses of this slot, in case some
404 * prior exit avoided calling ReorderBufferFree. Failure to do this can
405 * produce duplicated txns, and it's very cheap if there's nothing there.
406 */
408
409 return buffer;
410}
411
412/*
413 * Free a ReorderBuffer
414 */
415void
417{
418 MemoryContext context = rb->context;
419
420 /*
421 * We free separately allocated data by entirely scrapping reorderbuffer's
422 * memory context.
423 */
424 MemoryContextDelete(context);
425
426 /* Free disk space used by unconsumed reorder buffers */
428}
429
430/*
431 * Allocate a new ReorderBufferTXN.
432 */
433static ReorderBufferTXN *
435{
436 ReorderBufferTXN *txn;
437
438 txn = (ReorderBufferTXN *)
439 MemoryContextAlloc(rb->txn_context, sizeof(ReorderBufferTXN));
440
441 memset(txn, 0, sizeof(ReorderBufferTXN));
442
443 dlist_init(&txn->changes);
444 dlist_init(&txn->tuplecids);
445 dlist_init(&txn->subtxns);
446
447 /* InvalidCommandId is not zero, so set it explicitly */
450
451 return txn;
452}
453
454/*
455 * Free a ReorderBufferTXN.
456 */
457static void
459{
460 /* clean the lookup cache if we were cached (quite likely) */
461 if (rb->by_txn_last_xid == txn->xid)
462 {
463 rb->by_txn_last_xid = InvalidTransactionId;
464 rb->by_txn_last_txn = NULL;
465 }
466
467 /* free data that's contained */
468
469 if (txn->gid != NULL)
470 {
471 pfree(txn->gid);
472 txn->gid = NULL;
473 }
474
475 if (txn->tuplecid_hash != NULL)
476 {
478 txn->tuplecid_hash = NULL;
479 }
480
481 if (txn->invalidations)
482 {
483 pfree(txn->invalidations);
484 txn->invalidations = NULL;
485 }
486
488 {
491 }
492
493 /* Reset the toast hash */
495
496 /* All changes must be deallocated */
497 Assert(txn->size == 0);
498
499 pfree(txn);
500}
501
502/*
503 * Allocate a ReorderBufferChange.
504 */
507{
508 ReorderBufferChange *change;
509
510 change = (ReorderBufferChange *)
511 MemoryContextAlloc(rb->change_context, sizeof(ReorderBufferChange));
512
513 memset(change, 0, sizeof(ReorderBufferChange));
514 return change;
515}
516
517/*
518 * Free a ReorderBufferChange and update memory accounting, if requested.
519 */
520void
522 bool upd_mem)
523{
524 /* update memory accounting info */
525 if (upd_mem)
528
529 /* free contained data */
530 switch (change->action)
531 {
536 if (change->data.tp.newtuple)
537 {
539 change->data.tp.newtuple = NULL;
540 }
541
542 if (change->data.tp.oldtuple)
543 {
545 change->data.tp.oldtuple = NULL;
546 }
547 break;
549 if (change->data.msg.prefix != NULL)
550 pfree(change->data.msg.prefix);
551 change->data.msg.prefix = NULL;
552 if (change->data.msg.message != NULL)
553 pfree(change->data.msg.message);
554 change->data.msg.message = NULL;
555 break;
557 if (change->data.inval.invalidations)
558 pfree(change->data.inval.invalidations);
559 change->data.inval.invalidations = NULL;
560 break;
562 if (change->data.snapshot)
563 {
565 change->data.snapshot = NULL;
566 }
567 break;
568 /* no data in addition to the struct itself */
570 if (change->data.truncate.relids != NULL)
571 {
573 change->data.truncate.relids = NULL;
574 }
575 break;
580 break;
581 }
582
583 pfree(change);
584}
585
586/*
587 * Allocate a HeapTuple fitting a tuple of size tuple_len (excluding header
588 * overhead).
589 */
592{
593 HeapTuple tuple;
595
596 alloc_len = tuple_len + SizeofHeapTupleHeader;
597
598 tuple = (HeapTuple) MemoryContextAlloc(rb->tup_context,
600 tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
601
602 return tuple;
603}
604
605/*
606 * Free a HeapTuple returned by ReorderBufferAllocTupleBuf().
607 */
608void
610{
611 pfree(tuple);
612}
613
614/*
615 * Allocate an array for relids of truncated relations.
616 *
617 * We use the global memory context (for the whole reorder buffer), because
618 * none of the existing ones seems like a good match (some are SLAB, so we
619 * can't use those, and tup_context is meant for tuple data, not relids). We
620 * could add yet another context, but it seems like an overkill - TRUNCATE is
621 * not particularly common operation, so it does not seem worth it.
622 */
623Oid *
625{
626 Oid *relids;
628
629 alloc_len = sizeof(Oid) * nrelids;
630
631 relids = (Oid *) MemoryContextAlloc(rb->context, alloc_len);
632
633 return relids;
634}
635
636/*
637 * Free an array of relids.
638 */
639void
641{
642 pfree(relids);
643}
644
645/*
646 * Return the ReorderBufferTXN from the given buffer, specified by Xid.
647 * If create is true, and a transaction doesn't already exist, create it
648 * (with the given LSN, and as top transaction if that's specified);
649 * when this happens, is_new is set to true.
650 */
651static ReorderBufferTXN *
653 bool *is_new, XLogRecPtr lsn, bool create_as_top)
654{
655 ReorderBufferTXN *txn;
657 bool found;
658
660
661 /*
662 * Check the one-entry lookup cache first
663 */
664 if (TransactionIdIsValid(rb->by_txn_last_xid) &&
665 rb->by_txn_last_xid == xid)
666 {
667 txn = rb->by_txn_last_txn;
668
669 if (txn != NULL)
670 {
671 /* found it, and it's valid */
672 if (is_new)
673 *is_new = false;
674 return txn;
675 }
676
677 /*
678 * cached as non-existent, and asked not to create? Then nothing else
679 * to do.
680 */
681 if (!create)
682 return NULL;
683 /* otherwise fall through to create it */
684 }
685
686 /*
687 * If the cache wasn't hit or it yielded a "does-not-exist" and we want to
688 * create an entry.
689 */
690
691 /* search the lookup table */
693 hash_search(rb->by_txn,
694 &xid,
695 create ? HASH_ENTER : HASH_FIND,
696 &found);
697 if (found)
698 txn = ent->txn;
699 else if (create)
700 {
701 /* initialize the new entry, if creation was requested */
702 Assert(ent != NULL);
704
706 ent->txn->xid = xid;
707 txn = ent->txn;
708 txn->first_lsn = lsn;
709 txn->restart_decoding_lsn = rb->current_restart_decoding_lsn;
710
711 if (create_as_top)
712 {
713 dlist_push_tail(&rb->toplevel_by_lsn, &txn->node);
715 }
716 }
717 else
718 txn = NULL; /* not found and not asked to create */
719
720 /* update cache */
721 rb->by_txn_last_xid = xid;
722 rb->by_txn_last_txn = txn;
723
724 if (is_new)
725 *is_new = !found;
726
727 Assert(!create || txn != NULL);
728 return txn;
729}
730
731/*
732 * Record the partial change for the streaming of in-progress transactions. We
733 * can stream only complete changes so if we have a partial change like toast
734 * table insert or speculative insert then we mark such a 'txn' so that it
735 * can't be streamed. We also ensure that if the changes in such a 'txn' can
736 * be streamed and are above logical_decoding_work_mem threshold then we stream
737 * them as soon as we have a complete change.
738 */
739static void
741 ReorderBufferChange *change,
742 bool toast_insert)
743{
744 ReorderBufferTXN *toptxn;
745
746 /*
747 * The partial changes need to be processed only while streaming
748 * in-progress transactions.
749 */
751 return;
752
753 /* Get the top transaction. */
754 toptxn = rbtxn_get_toptxn(txn);
755
756 /*
757 * Indicate a partial change for toast inserts. The change will be
758 * considered as complete once we get the insert or update on the main
759 * table and we are sure that the pending toast chunks are not required
760 * anymore.
761 *
762 * If we allow streaming when there are pending toast chunks then such
763 * chunks won't be released till the insert (multi_insert) is complete and
764 * we expect the txn to have streamed all changes after streaming. This
765 * restriction is mainly to ensure the correctness of streamed
766 * transactions and it doesn't seem worth uplifting such a restriction
767 * just to allow this case because anyway we will stream the transaction
768 * once such an insert is complete.
769 */
770 if (toast_insert)
772 else if (rbtxn_has_partial_change(toptxn) &&
773 IsInsertOrUpdate(change->action) &&
776
777 /*
778 * Indicate a partial change for speculative inserts. The change will be
779 * considered as complete once we get the speculative confirm or abort
780 * token.
781 */
782 if (IsSpecInsert(change->action))
784 else if (rbtxn_has_partial_change(toptxn) &&
787
788 /*
789 * Stream the transaction if it is serialized before and the changes are
790 * now complete in the top-level transaction.
791 *
792 * The reason for doing the streaming of such a transaction as soon as we
793 * get the complete change for it is that previously it would have reached
794 * the memory threshold and wouldn't get streamed because of incomplete
795 * changes. Delaying such transactions would increase apply lag for them.
796 */
798 !(rbtxn_has_partial_change(toptxn)) &&
799 rbtxn_is_serialized(txn) &&
801 ReorderBufferStreamTXN(rb, toptxn);
802}
803
804/*
805 * Queue a change into a transaction so it can be replayed upon commit or will be
806 * streamed when we reach logical_decoding_work_mem threshold.
807 */
808void
810 ReorderBufferChange *change, bool toast_insert)
811{
812 ReorderBufferTXN *txn;
813
814 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
815
816 /*
817 * If we have detected that the transaction is aborted while streaming the
818 * previous changes or by checking its CLOG, there is no point in
819 * collecting further changes for it.
820 */
821 if (rbtxn_is_aborted(txn))
822 {
823 /*
824 * We don't need to update memory accounting for this change as we
825 * have not added it to the queue yet.
826 */
827 ReorderBufferFreeChange(rb, change, false);
828 return;
829 }
830
831 /*
832 * The changes that are sent downstream are considered streamable. We
833 * remember such transactions so that only those will later be considered
834 * for streaming.
835 */
836 if (change->action == REORDER_BUFFER_CHANGE_INSERT ||
842 {
843 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
844
846 }
847
848 change->lsn = lsn;
849 change->txn = txn;
850
852 dlist_push_tail(&txn->changes, &change->node);
853 txn->nentries++;
854 txn->nentries_mem++;
855
856 /* update memory accounting information */
859
860 /* process partial change */
862
863 /* check the memory limits and evict something if needed */
865}
866
867/*
868 * A transactional message is queued to be processed upon commit and a
869 * non-transactional message gets processed immediately.
870 */
871void
874 bool transactional, const char *prefix,
875 Size message_size, const char *message)
876{
877 if (transactional)
878 {
879 MemoryContext oldcontext;
880 ReorderBufferChange *change;
881
883
884 /*
885 * We don't expect snapshots for transactional changes - we'll use the
886 * snapshot derived later during apply (unless the change gets
887 * skipped).
888 */
889 Assert(!snap);
890
891 oldcontext = MemoryContextSwitchTo(rb->context);
892
895 change->data.msg.prefix = pstrdup(prefix);
896 change->data.msg.message_size = message_size;
897 change->data.msg.message = palloc(message_size);
898 memcpy(change->data.msg.message, message, message_size);
899
900 ReorderBufferQueueChange(rb, xid, lsn, change, false);
901
902 MemoryContextSwitchTo(oldcontext);
903 }
904 else
905 {
906 ReorderBufferTXN *txn = NULL;
907 volatile Snapshot snapshot_now = snap;
908
909 /* Non-transactional changes require a valid snapshot. */
910 Assert(snapshot_now);
911
912 if (xid != InvalidTransactionId)
913 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
914
915 /* setup snapshot to allow catalog access */
916 SetupHistoricSnapshot(snapshot_now, NULL);
917 PG_TRY();
918 {
919 rb->message(rb, txn, lsn, false, prefix, message_size, message);
920
922 }
923 PG_CATCH();
924 {
926 PG_RE_THROW();
927 }
928 PG_END_TRY();
929 }
930}
931
932/*
933 * AssertTXNLsnOrder
934 * Verify LSN ordering of transaction lists in the reorderbuffer
935 *
936 * Other LSN-related invariants are checked too.
937 *
938 * No-op if assertions are not in use.
939 */
940static void
942{
943#ifdef USE_ASSERT_CHECKING
944 LogicalDecodingContext *ctx = rb->private_data;
945 dlist_iter iter;
948
949 /*
950 * Skip the verification if we don't reach the LSN at which we start
951 * decoding the contents of transactions yet because until we reach the
952 * LSN, we could have transactions that don't have the association between
953 * the top-level transaction and subtransaction yet and consequently have
954 * the same LSN. We don't guarantee this association until we try to
955 * decode the actual contents of transaction. The ordering of the records
956 * prior to the start_decoding_at LSN should have been checked before the
957 * restart.
958 */
960 return;
961
962 dlist_foreach(iter, &rb->toplevel_by_lsn)
963 {
965 iter.cur);
966
967 /* start LSN must be set */
968 Assert(XLogRecPtrIsValid(cur_txn->first_lsn));
969
970 /* If there is an end LSN, it must be higher than start LSN */
971 if (XLogRecPtrIsValid(cur_txn->end_lsn))
972 Assert(cur_txn->first_lsn <= cur_txn->end_lsn);
973
974 /* Current initial LSN must be strictly higher than previous */
977
978 /* known-as-subtxn txns must not be listed */
980
981 prev_first_lsn = cur_txn->first_lsn;
982 }
983
984 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
985 {
987 base_snapshot_node,
988 iter.cur);
989
990 /* base snapshot (and its LSN) must be set */
991 Assert(cur_txn->base_snapshot != NULL);
992 Assert(XLogRecPtrIsValid(cur_txn->base_snapshot_lsn));
993
994 /* current LSN must be strictly higher than previous */
996 Assert(prev_base_snap_lsn < cur_txn->base_snapshot_lsn);
997
998 /* known-as-subtxn txns must not be listed */
1000
1001 prev_base_snap_lsn = cur_txn->base_snapshot_lsn;
1002 }
1003#endif
1004}
1005
1006/*
1007 * AssertChangeLsnOrder
1008 *
1009 * Check ordering of changes in the (sub)transaction.
1010 */
1011static void
1013{
1014#ifdef USE_ASSERT_CHECKING
1015 dlist_iter iter;
1017
1018 dlist_foreach(iter, &txn->changes)
1019 {
1021
1023
1026 Assert(txn->first_lsn <= cur_change->lsn);
1027
1028 if (XLogRecPtrIsValid(txn->end_lsn))
1029 Assert(cur_change->lsn <= txn->end_lsn);
1030
1032
1033 prev_lsn = cur_change->lsn;
1034 }
1035#endif
1036}
1037
1038/*
1039 * ReorderBufferGetOldestTXN
1040 * Return oldest transaction in reorderbuffer
1041 */
1044{
1045 ReorderBufferTXN *txn;
1046
1048
1049 if (dlist_is_empty(&rb->toplevel_by_lsn))
1050 return NULL;
1051
1052 txn = dlist_head_element(ReorderBufferTXN, node, &rb->toplevel_by_lsn);
1053
1056 return txn;
1057}
1058
1059/*
1060 * ReorderBufferGetOldestXmin
1061 * Return oldest Xmin in reorderbuffer
1062 *
1063 * Returns oldest possibly running Xid from the point of view of snapshots
1064 * used in the transactions kept by reorderbuffer, or InvalidTransactionId if
1065 * there are none.
1066 *
1067 * Since snapshots are assigned monotonically, this equals the Xmin of the
1068 * base snapshot with minimal base_snapshot_lsn.
1069 */
1072{
1073 ReorderBufferTXN *txn;
1074
1076
1077 if (dlist_is_empty(&rb->txns_by_base_snapshot_lsn))
1078 return InvalidTransactionId;
1079
1080 txn = dlist_head_element(ReorderBufferTXN, base_snapshot_node,
1081 &rb->txns_by_base_snapshot_lsn);
1082 return txn->base_snapshot->xmin;
1083}
1084
1085void
1087{
1088 rb->current_restart_decoding_lsn = ptr;
1089}
1090
1091/*
1092 * ReorderBufferAssignChild
1093 *
1094 * Make note that we know that subxid is a subtransaction of xid, seen as of
1095 * the given lsn.
1096 */
1097void
1099 TransactionId subxid, XLogRecPtr lsn)
1100{
1101 ReorderBufferTXN *txn;
1103 bool new_top;
1104 bool new_sub;
1105
1106 txn = ReorderBufferTXNByXid(rb, xid, true, &new_top, lsn, true);
1107 subtxn = ReorderBufferTXNByXid(rb, subxid, true, &new_sub, lsn, false);
1108
1109 if (!new_sub)
1110 {
1112 {
1113 /* already associated, nothing to do */
1114 return;
1115 }
1116 else
1117 {
1118 /*
1119 * We already saw this transaction, but initially added it to the
1120 * list of top-level txns. Now that we know it's not top-level,
1121 * remove it from there.
1122 */
1123 dlist_delete(&subtxn->node);
1124 }
1125 }
1126
1127 subtxn->txn_flags |= RBTXN_IS_SUBXACT;
1128 subtxn->toplevel_xid = xid;
1129 Assert(subtxn->nsubtxns == 0);
1130
1131 /* set the reference to top-level transaction */
1132 subtxn->toptxn = txn;
1133
1134 /* add to subtransaction list */
1135 dlist_push_tail(&txn->subtxns, &subtxn->node);
1136 txn->nsubtxns++;
1137
1138 /* Possibly transfer the subtxn's snapshot to its top-level txn. */
1140
1141 /* Verify LSN-ordering invariant */
1143}
1144
1145/*
1146 * ReorderBufferTransferSnapToParent
1147 * Transfer base snapshot from subtxn to top-level txn, if needed
1148 *
1149 * This is done if the top-level txn doesn't have a base snapshot, or if the
1150 * subtxn's base snapshot has an earlier LSN than the top-level txn's base
1151 * snapshot's LSN. This can happen if there are no changes in the toplevel
1152 * txn but there are some in the subtxn, or the first change in subtxn has
1153 * earlier LSN than first change in the top-level txn and we learned about
1154 * their kinship only now.
1155 *
1156 * The subtransaction's snapshot is cleared regardless of the transfer
1157 * happening, since it's not needed anymore in either case.
1158 *
1159 * We do this as soon as we become aware of their kinship, to avoid queueing
1160 * extra snapshots to txns known-as-subtxns -- only top-level txns will
1161 * receive further snapshots.
1162 */
1163static void
1166{
1167 Assert(subtxn->toplevel_xid == txn->xid);
1168
1169 if (subtxn->base_snapshot != NULL)
1170 {
1171 if (txn->base_snapshot == NULL ||
1172 subtxn->base_snapshot_lsn < txn->base_snapshot_lsn)
1173 {
1174 /*
1175 * If the toplevel transaction already has a base snapshot but
1176 * it's newer than the subxact's, purge it.
1177 */
1178 if (txn->base_snapshot != NULL)
1179 {
1182 }
1183
1184 /*
1185 * The snapshot is now the top transaction's; transfer it, and
1186 * adjust the list position of the top transaction in the list by
1187 * moving it to where the subtransaction is.
1188 */
1189 txn->base_snapshot = subtxn->base_snapshot;
1190 txn->base_snapshot_lsn = subtxn->base_snapshot_lsn;
1191 dlist_insert_before(&subtxn->base_snapshot_node,
1192 &txn->base_snapshot_node);
1193
1194 /*
1195 * The subtransaction doesn't have a snapshot anymore (so it
1196 * mustn't be in the list.)
1197 */
1198 subtxn->base_snapshot = NULL;
1199 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1200 dlist_delete(&subtxn->base_snapshot_node);
1201 }
1202 else
1203 {
1204 /* Base snap of toplevel is fine, so subxact's is not needed */
1205 SnapBuildSnapDecRefcount(subtxn->base_snapshot);
1206 dlist_delete(&subtxn->base_snapshot_node);
1207 subtxn->base_snapshot = NULL;
1208 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1209 }
1210 }
1211}
1212
1213/*
1214 * Associate a subtransaction with its toplevel transaction at commit
1215 * time. There may be no further changes added after this.
1216 */
1217void
1219 TransactionId subxid, XLogRecPtr commit_lsn,
1220 XLogRecPtr end_lsn)
1221{
1223
1224 subtxn = ReorderBufferTXNByXid(rb, subxid, false, NULL,
1225 InvalidXLogRecPtr, false);
1226
1227 /*
1228 * No need to do anything if that subtxn didn't contain any changes
1229 */
1230 if (!subtxn)
1231 return;
1232
1233 subtxn->final_lsn = commit_lsn;
1234 subtxn->end_lsn = end_lsn;
1235
1236 /*
1237 * Assign this subxact as a child of the toplevel xact (no-op if already
1238 * done.)
1239 */
1241}
1242
1243
1244/*
1245 * Support for efficiently iterating over a transaction's and its
1246 * subtransactions' changes.
1247 *
1248 * We do by doing a k-way merge between transactions/subtransactions. For that
1249 * we model the current heads of the different transactions as a binary heap
1250 * so we easily know which (sub-)transaction has the change with the smallest
1251 * lsn next.
1252 *
1253 * We assume the changes in individual transactions are already sorted by LSN.
1254 */
1255
1256/*
1257 * Binary heap comparison function.
1258 */
1259static int
1261{
1263 XLogRecPtr pos_a = state->entries[DatumGetInt32(a)].lsn;
1264 XLogRecPtr pos_b = state->entries[DatumGetInt32(b)].lsn;
1265
1266 if (pos_a < pos_b)
1267 return 1;
1268 else if (pos_a == pos_b)
1269 return 0;
1270 return -1;
1271}
1272
1273/*
1274 * Allocate & initialize an iterator which iterates in lsn order over a
1275 * transaction and all its subtransactions.
1276 *
1277 * Note: The iterator state is returned through iter_state parameter rather
1278 * than the function's return value. This is because the state gets cleaned up
1279 * in a PG_CATCH block in the caller, so we want to make sure the caller gets
1280 * back the state even if this function throws an exception.
1281 */
1282static void
1285{
1286 Size nr_txns = 0;
1289 int32 off;
1290
1291 *iter_state = NULL;
1292
1293 /* Check ordering of changes in the toplevel transaction. */
1295
1296 /*
1297 * Calculate the size of our heap: one element for every transaction that
1298 * contains changes. (Besides the transactions already in the reorder
1299 * buffer, we count the one we were directly passed.)
1300 */
1301 if (txn->nentries > 0)
1302 nr_txns++;
1303
1305 {
1307
1309
1310 /* Check ordering of changes in this subtransaction. */
1312
1313 if (cur_txn->nentries > 0)
1314 nr_txns++;
1315 }
1316
1317 /* allocate iteration state */
1319 MemoryContextAllocZero(rb->context,
1321 sizeof(ReorderBufferIterTXNEntry) * nr_txns);
1322
1323 state->nr_txns = nr_txns;
1324 dlist_init(&state->old_change);
1325
1326 for (off = 0; off < state->nr_txns; off++)
1327 {
1328 state->entries[off].file.vfd = -1;
1329 state->entries[off].segno = 0;
1330 }
1331
1332 /* allocate heap */
1333 state->heap = binaryheap_allocate(state->nr_txns,
1335 state);
1336
1337 /* Now that the state fields are initialized, it is safe to return it. */
1338 *iter_state = state;
1339
1340 /*
1341 * Now insert items into the binary heap, in an unordered fashion. (We
1342 * will run a heap assembly step at the end; this is more efficient.)
1343 */
1344
1345 off = 0;
1346
1347 /* add toplevel transaction if it contains changes */
1348 if (txn->nentries > 0)
1349 {
1351
1352 if (rbtxn_is_serialized(txn))
1353 {
1354 /* serialize remaining changes */
1356 ReorderBufferRestoreChanges(rb, txn, &state->entries[off].file,
1357 &state->entries[off].segno);
1358 }
1359
1361 &txn->changes);
1362
1363 state->entries[off].lsn = cur_change->lsn;
1364 state->entries[off].change = cur_change;
1365 state->entries[off].txn = txn;
1366
1368 }
1369
1370 /* add subtransactions if they contain changes */
1372 {
1374
1376
1377 if (cur_txn->nentries > 0)
1378 {
1380
1382 {
1383 /* serialize remaining changes */
1386 &state->entries[off].file,
1387 &state->entries[off].segno);
1388 }
1390 &cur_txn->changes);
1391
1392 state->entries[off].lsn = cur_change->lsn;
1393 state->entries[off].change = cur_change;
1394 state->entries[off].txn = cur_txn;
1395
1397 }
1398 }
1399
1400 /* assemble a valid binary heap */
1401 binaryheap_build(state->heap);
1402}
1403
1404/*
1405 * Return the next change when iterating over a transaction and its
1406 * subtransactions.
1407 *
1408 * Returns NULL when no further changes exist.
1409 */
1410static ReorderBufferChange *
1412{
1413 ReorderBufferChange *change;
1415 int32 off;
1416
1417 /* nothing there anymore */
1418 if (binaryheap_empty(state->heap))
1419 return NULL;
1420
1421 off = DatumGetInt32(binaryheap_first(state->heap));
1422 entry = &state->entries[off];
1423
1424 /* free memory we might have "leaked" in the previous *Next call */
1425 if (!dlist_is_empty(&state->old_change))
1426 {
1427 change = dlist_container(ReorderBufferChange, node,
1428 dlist_pop_head_node(&state->old_change));
1429 ReorderBufferFreeChange(rb, change, true);
1430 Assert(dlist_is_empty(&state->old_change));
1431 }
1432
1433 change = entry->change;
1434
1435 /*
1436 * update heap with information about which transaction has the next
1437 * relevant change in LSN order
1438 */
1439
1440 /* there are in-memory changes */
1441 if (dlist_has_next(&entry->txn->changes, &entry->change->node))
1442 {
1443 dlist_node *next = dlist_next_node(&entry->txn->changes, &change->node);
1446
1447 /* txn stays the same */
1448 state->entries[off].lsn = next_change->lsn;
1449 state->entries[off].change = next_change;
1450
1452 return change;
1453 }
1454
1455 /* try to load changes from disk */
1456 if (entry->txn->nentries != entry->txn->nentries_mem)
1457 {
1458 /*
1459 * Ugly: restoring changes will reuse *Change records, thus delete the
1460 * current one from the per-tx list and only free in the next call.
1461 */
1462 dlist_delete(&change->node);
1463 dlist_push_tail(&state->old_change, &change->node);
1464
1465 /*
1466 * Update the total bytes processed by the txn for which we are
1467 * releasing the current set of changes and restoring the new set of
1468 * changes.
1469 */
1470 rb->totalBytes += entry->txn->size;
1471 if (ReorderBufferRestoreChanges(rb, entry->txn, &entry->file,
1472 &state->entries[off].segno))
1473 {
1474 /* successfully restored changes from disk */
1477 &entry->txn->changes);
1478
1479 elog(DEBUG2, "restored %u/%u changes from disk",
1480 (uint32) entry->txn->nentries_mem,
1481 (uint32) entry->txn->nentries);
1482
1483 Assert(entry->txn->nentries_mem);
1484 /* txn stays the same */
1485 state->entries[off].lsn = next_change->lsn;
1486 state->entries[off].change = next_change;
1488
1489 return change;
1490 }
1491 }
1492
1493 /* ok, no changes there anymore, remove */
1495
1496 return change;
1497}
1498
1499/*
1500 * Deallocate the iterator
1501 */
1502static void
1505{
1506 int32 off;
1507
1508 for (off = 0; off < state->nr_txns; off++)
1509 {
1510 if (state->entries[off].file.vfd != -1)
1511 FileClose(state->entries[off].file.vfd);
1512 }
1513
1514 /* free memory we might have "leaked" in the last *Next call */
1515 if (!dlist_is_empty(&state->old_change))
1516 {
1517 ReorderBufferChange *change;
1518
1519 change = dlist_container(ReorderBufferChange, node,
1520 dlist_pop_head_node(&state->old_change));
1521 ReorderBufferFreeChange(rb, change, true);
1522 Assert(dlist_is_empty(&state->old_change));
1523 }
1524
1525 binaryheap_free(state->heap);
1526 pfree(state);
1527}
1528
1529/*
1530 * Cleanup the contents of a transaction, usually after the transaction
1531 * committed or aborted.
1532 */
1533static void
1535{
1536 bool found;
1537 dlist_mutable_iter iter;
1538 Size mem_freed = 0;
1539
1540 /* cleanup subtransactions & their changes */
1541 dlist_foreach_modify(iter, &txn->subtxns)
1542 {
1544
1546
1547 /*
1548 * Subtransactions are always associated to the toplevel TXN, even if
1549 * they originally were happening inside another subtxn, so we won't
1550 * ever recurse more than one level deep here.
1551 */
1553 Assert(subtxn->nsubtxns == 0);
1554
1556 }
1557
1558 /* cleanup changes in the txn */
1559 dlist_foreach_modify(iter, &txn->changes)
1560 {
1561 ReorderBufferChange *change;
1562
1563 change = dlist_container(ReorderBufferChange, node, iter.cur);
1564
1565 /* Check we're not mixing changes from different transactions. */
1566 Assert(change->txn == txn);
1567
1568 /*
1569 * Instead of updating the memory counter for individual changes, we
1570 * sum up the size of memory to free so we can update the memory
1571 * counter all together below. This saves costs of maintaining the
1572 * max-heap.
1573 */
1575
1576 ReorderBufferFreeChange(rb, change, false);
1577 }
1578
1579 /* Update the memory counter */
1581
1582 /*
1583 * Cleanup the tuplecids we stored for decoding catalog snapshot access.
1584 * They are always stored in the toplevel transaction.
1585 */
1586 dlist_foreach_modify(iter, &txn->tuplecids)
1587 {
1588 ReorderBufferChange *change;
1589
1590 change = dlist_container(ReorderBufferChange, node, iter.cur);
1591
1592 /* Check we're not mixing changes from different transactions. */
1593 Assert(change->txn == txn);
1595
1596 ReorderBufferFreeChange(rb, change, true);
1597 }
1598
1599 /*
1600 * Cleanup the base snapshot, if set.
1601 */
1602 if (txn->base_snapshot != NULL)
1603 {
1606 }
1607
1608 /*
1609 * Cleanup the snapshot for the last streamed run.
1610 */
1611 if (txn->snapshot_now != NULL)
1612 {
1615 }
1616
1617 /*
1618 * Remove TXN from its containing lists.
1619 *
1620 * Note: if txn is known as subxact, we are deleting the TXN from its
1621 * parent's list of known subxacts; this leaves the parent's nsubxacts
1622 * count too high, but we don't care. Otherwise, we are deleting the TXN
1623 * from the LSN-ordered list of toplevel TXNs. We remove the TXN from the
1624 * list of catalog modifying transactions as well.
1625 */
1626 dlist_delete(&txn->node);
1628 dclist_delete_from(&rb->catchange_txns, &txn->catchange_node);
1629
1630 /* now remove reference from buffer */
1631 hash_search(rb->by_txn, &txn->xid, HASH_REMOVE, &found);
1632 Assert(found);
1633
1634 /* remove entries spilled to disk */
1635 if (rbtxn_is_serialized(txn))
1637
1638 /* deallocate */
1640}
1641
1642/*
1643 * Discard changes from a transaction (and subtransactions), either after
1644 * streaming, decoding them at PREPARE, or detecting the transaction abort.
1645 * Keep the remaining info - transactions, tuplecids, invalidations and
1646 * snapshots.
1647 *
1648 * We additionally remove tuplecids after decoding the transaction at prepare
1649 * time as we only need to perform invalidation at rollback or commit prepared.
1650 *
1651 * 'txn_prepared' indicates that we have decoded the transaction at prepare
1652 * time.
1653 */
1654static void
1656{
1657 dlist_mutable_iter iter;
1658 Size mem_freed = 0;
1659
1660 /* cleanup subtransactions & their changes */
1661 dlist_foreach_modify(iter, &txn->subtxns)
1662 {
1664
1666
1667 /*
1668 * Subtransactions are always associated to the toplevel TXN, even if
1669 * they originally were happening inside another subtxn, so we won't
1670 * ever recurse more than one level deep here.
1671 */
1673 Assert(subtxn->nsubtxns == 0);
1674
1677 }
1678
1679 /* cleanup changes in the txn */
1680 dlist_foreach_modify(iter, &txn->changes)
1681 {
1682 ReorderBufferChange *change;
1683
1684 change = dlist_container(ReorderBufferChange, node, iter.cur);
1685
1686 /* Check we're not mixing changes from different transactions. */
1687 Assert(change->txn == txn);
1688
1689 /* remove the change from its containing list */
1690 dlist_delete(&change->node);
1691
1692 /*
1693 * Instead of updating the memory counter for individual changes, we
1694 * sum up the size of memory to free so we can update the memory
1695 * counter all together below. This saves costs of maintaining the
1696 * max-heap.
1697 */
1699
1700 ReorderBufferFreeChange(rb, change, false);
1701 }
1702
1703 /* Update the memory counter */
1705
1706 if (txn_prepared)
1707 {
1708 /*
1709 * If this is a prepared txn, cleanup the tuplecids we stored for
1710 * decoding catalog snapshot access. They are always stored in the
1711 * toplevel transaction.
1712 */
1713 dlist_foreach_modify(iter, &txn->tuplecids)
1714 {
1715 ReorderBufferChange *change;
1716
1717 change = dlist_container(ReorderBufferChange, node, iter.cur);
1718
1719 /* Check we're not mixing changes from different transactions. */
1720 Assert(change->txn == txn);
1722
1723 /* Remove the change from its containing list. */
1724 dlist_delete(&change->node);
1725
1726 ReorderBufferFreeChange(rb, change, true);
1727 }
1728 }
1729
1730 /*
1731 * Destroy the (relfilelocator, ctid) hashtable, so that we don't leak any
1732 * memory. We could also keep the hash table and update it with new ctid
1733 * values, but this seems simpler and good enough for now.
1734 */
1735 if (txn->tuplecid_hash != NULL)
1736 {
1738 txn->tuplecid_hash = NULL;
1739 }
1740
1741 /* If this txn is serialized then clean the disk space. */
1742 if (rbtxn_is_serialized(txn))
1743 {
1746
1747 /*
1748 * We set this flag to indicate if the transaction is ever serialized.
1749 * We need this to accurately update the stats as otherwise the same
1750 * transaction can be counted as serialized multiple times.
1751 */
1753 }
1754
1755 /* also reset the number of entries in the transaction */
1756 txn->nentries_mem = 0;
1757 txn->nentries = 0;
1758}
1759
1760/*
1761 * Check the transaction status by CLOG lookup and discard all changes if
1762 * the transaction is aborted. The transaction status is cached in
1763 * txn->txn_flags so we can skip future changes and avoid CLOG lookups on the
1764 * next call.
1765 *
1766 * Return true if the transaction is aborted, otherwise return false.
1767 *
1768 * When the 'debug_logical_replication_streaming' is set to "immediate", we
1769 * don't check the transaction status, meaning the caller will always process
1770 * this transaction.
1771 */
1772static bool
1774{
1775 /* Quick return for regression tests */
1777 return false;
1778
1779 /*
1780 * Quick return if the transaction status is already known.
1781 */
1782
1783 if (rbtxn_is_committed(txn))
1784 return false;
1785 if (rbtxn_is_aborted(txn))
1786 {
1787 /* Already-aborted transactions should not have any changes */
1788 Assert(txn->size == 0);
1789
1790 return true;
1791 }
1792
1793 /* Otherwise, check the transaction status using CLOG lookup */
1794
1796 return false;
1797
1798 if (TransactionIdDidCommit(txn->xid))
1799 {
1800 /*
1801 * Remember the transaction is committed so that we can skip CLOG
1802 * check next time, avoiding the pressure on CLOG lookup.
1803 */
1804 Assert(!rbtxn_is_aborted(txn));
1806 return false;
1807 }
1808
1809 /*
1810 * The transaction aborted. We discard both the changes collected so far
1811 * and the toast reconstruction data. The full cleanup will happen as part
1812 * of decoding ABORT record of this transaction.
1813 */
1816
1817 /* All changes should be discarded */
1818 Assert(txn->size == 0);
1819
1820 /*
1821 * Mark the transaction as aborted so we can ignore future changes of this
1822 * transaction.
1823 */
1826
1827 return true;
1828}
1829
1830/*
1831 * Build a hash with a (relfilelocator, ctid) -> (cmin, cmax) mapping for use by
1832 * HeapTupleSatisfiesHistoricMVCC.
1833 */
1834static void
1836{
1837 dlist_iter iter;
1839
1841 return;
1842
1844 hash_ctl.entrysize = sizeof(ReorderBufferTupleCidEnt);
1845 hash_ctl.hcxt = rb->context;
1846
1847 /*
1848 * create the hash with the exact number of to-be-stored tuplecids from
1849 * the start
1850 */
1851 txn->tuplecid_hash =
1852 hash_create("ReorderBufferTupleCid", txn->ntuplecids, &hash_ctl,
1854
1855 dlist_foreach(iter, &txn->tuplecids)
1856 {
1859 bool found;
1860 ReorderBufferChange *change;
1861
1862 change = dlist_container(ReorderBufferChange, node, iter.cur);
1863
1865
1866 /* be careful about padding */
1867 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
1868
1869 key.rlocator = change->data.tuplecid.locator;
1870
1872 &key.tid);
1873
1875 hash_search(txn->tuplecid_hash, &key, HASH_ENTER, &found);
1876 if (!found)
1877 {
1878 ent->cmin = change->data.tuplecid.cmin;
1879 ent->cmax = change->data.tuplecid.cmax;
1880 ent->combocid = change->data.tuplecid.combocid;
1881 }
1882 else
1883 {
1884 /*
1885 * Maybe we already saw this tuple before in this transaction, but
1886 * if so it must have the same cmin.
1887 */
1888 Assert(ent->cmin == change->data.tuplecid.cmin);
1889
1890 /*
1891 * cmax may be initially invalid, but once set it can only grow,
1892 * and never become invalid again.
1893 */
1894 Assert((ent->cmax == InvalidCommandId) ||
1895 ((change->data.tuplecid.cmax != InvalidCommandId) &&
1896 (change->data.tuplecid.cmax > ent->cmax)));
1897 ent->cmax = change->data.tuplecid.cmax;
1898 }
1899 }
1900}
1901
1902/*
1903 * Copy a provided snapshot so we can modify it privately. This is needed so
1904 * that catalog modifying transactions can look into intermediate catalog
1905 * states.
1906 */
1907static Snapshot
1910{
1911 Snapshot snap;
1912 dlist_iter iter;
1913 int i = 0;
1914 Size size;
1915
1916 size = sizeof(SnapshotData) +
1917 sizeof(TransactionId) * orig_snap->xcnt +
1918 sizeof(TransactionId) * (txn->nsubtxns + 1);
1919
1920 snap = MemoryContextAllocZero(rb->context, size);
1921 memcpy(snap, orig_snap, sizeof(SnapshotData));
1922
1923 snap->copied = true;
1924 snap->active_count = 1; /* mark as active so nobody frees it */
1925 snap->regd_count = 0;
1926 snap->xip = (TransactionId *) (snap + 1);
1927
1928 memcpy(snap->xip, orig_snap->xip, sizeof(TransactionId) * snap->xcnt);
1929
1930 /*
1931 * snap->subxip contains all txids that belong to our transaction which we
1932 * need to check via cmin/cmax. That's why we store the toplevel
1933 * transaction in there as well.
1934 */
1935 snap->subxip = snap->xip + snap->xcnt;
1936 snap->subxip[i++] = txn->xid;
1937
1938 /*
1939 * txn->nsubtxns isn't decreased when subtransactions abort, so count
1940 * manually. Since it's an upper boundary it is safe to use it for the
1941 * allocation above.
1942 */
1943 snap->subxcnt = 1;
1944
1945 dlist_foreach(iter, &txn->subtxns)
1946 {
1948
1950 snap->subxip[i++] = sub_txn->xid;
1951 snap->subxcnt++;
1952 }
1953
1954 /* sort so we can bsearch() later */
1955 qsort(snap->subxip, snap->subxcnt, sizeof(TransactionId), xidComparator);
1956
1957 /* store the specified current CommandId */
1958 snap->curcid = cid;
1959
1960 return snap;
1961}
1962
1963/*
1964 * Free a previously ReorderBufferCopySnap'ed snapshot
1965 */
1966static void
1968{
1969 if (snap->copied)
1970 pfree(snap);
1971 else
1973}
1974
1975/*
1976 * If the transaction was (partially) streamed, we need to prepare or commit
1977 * it in a 'streamed' way. That is, we first stream the remaining part of the
1978 * transaction, and then invoke stream_prepare or stream_commit message as per
1979 * the case.
1980 */
1981static void
1983{
1984 /* we should only call this for previously streamed transactions */
1986
1988
1989 if (rbtxn_is_prepared(txn))
1990 {
1991 /*
1992 * Note, we send stream prepare even if a concurrent abort is
1993 * detected. See DecodePrepare for more information.
1994 */
1996 rb->stream_prepare(rb, txn, txn->final_lsn);
1998
1999 /*
2000 * This is a PREPARED transaction, part of a two-phase commit. The
2001 * full cleanup will happen as part of the COMMIT PREPAREDs, so now
2002 * just truncate txn by removing changes and tuplecids.
2003 */
2004 ReorderBufferTruncateTXN(rb, txn, true);
2005 /* Reset the CheckXidAlive */
2007 }
2008 else
2009 {
2010 rb->stream_commit(rb, txn, txn->final_lsn);
2012 }
2013}
2014
2015/*
2016 * Set xid to detect concurrent aborts.
2017 *
2018 * While streaming an in-progress transaction or decoding a prepared
2019 * transaction there is a possibility that the (sub)transaction might get
2020 * aborted concurrently. In such case if the (sub)transaction has catalog
2021 * update then we might decode the tuple using wrong catalog version. For
2022 * example, suppose there is one catalog tuple with (xmin: 500, xmax: 0). Now,
2023 * the transaction 501 updates the catalog tuple and after that we will have
2024 * two tuples (xmin: 500, xmax: 501) and (xmin: 501, xmax: 0). Now, if 501 is
2025 * aborted and some other transaction say 502 updates the same catalog tuple
2026 * then the first tuple will be changed to (xmin: 500, xmax: 502). So, the
2027 * problem is that when we try to decode the tuple inserted/updated in 501
2028 * after the catalog update, we will see the catalog tuple with (xmin: 500,
2029 * xmax: 502) as visible because it will consider that the tuple is deleted by
2030 * xid 502 which is not visible to our snapshot. And when we will try to
2031 * decode with that catalog tuple, it can lead to a wrong result or a crash.
2032 * So, it is necessary to detect concurrent aborts to allow streaming of
2033 * in-progress transactions or decoding of prepared transactions.
2034 *
2035 * For detecting the concurrent abort we set CheckXidAlive to the current
2036 * (sub)transaction's xid for which this change belongs to. And, during
2037 * catalog scan we can check the status of the xid and if it is aborted we will
2038 * report a specific error so that we can stop streaming current transaction
2039 * and discard the already streamed changes on such an error. We might have
2040 * already streamed some of the changes for the aborted (sub)transaction, but
2041 * that is fine because when we decode the abort we will stream abort message
2042 * to truncate the changes in the subscriber. Similarly, for prepared
2043 * transactions, we stop decoding if concurrent abort is detected and then
2044 * rollback the changes when rollback prepared is encountered. See
2045 * DecodePrepare.
2046 */
2047static inline void
2049{
2050 /*
2051 * If the input transaction id is already set as a CheckXidAlive then
2052 * nothing to do.
2053 */
2055 return;
2056
2057 /*
2058 * setup CheckXidAlive if it's not committed yet. We don't check if the
2059 * xid is aborted. That will happen during catalog access.
2060 */
2061 if (!TransactionIdDidCommit(xid))
2062 CheckXidAlive = xid;
2063 else
2065}
2066
2067/*
2068 * Helper function for ReorderBufferProcessTXN for applying change.
2069 */
2070static inline void
2072 Relation relation, ReorderBufferChange *change,
2073 bool streaming)
2074{
2075 if (streaming)
2076 rb->stream_change(rb, txn, relation, change);
2077 else
2078 rb->apply_change(rb, txn, relation, change);
2079}
2080
2081/*
2082 * Helper function for ReorderBufferProcessTXN for applying the truncate.
2083 */
2084static inline void
2086 int nrelations, Relation *relations,
2087 ReorderBufferChange *change, bool streaming)
2088{
2089 if (streaming)
2090 rb->stream_truncate(rb, txn, nrelations, relations, change);
2091 else
2092 rb->apply_truncate(rb, txn, nrelations, relations, change);
2093}
2094
2095/*
2096 * Helper function for ReorderBufferProcessTXN for applying the message.
2097 */
2098static inline void
2100 ReorderBufferChange *change, bool streaming)
2101{
2102 if (streaming)
2103 rb->stream_message(rb, txn, change->lsn, true,
2104 change->data.msg.prefix,
2105 change->data.msg.message_size,
2106 change->data.msg.message);
2107 else
2108 rb->message(rb, txn, change->lsn, true,
2109 change->data.msg.prefix,
2110 change->data.msg.message_size,
2111 change->data.msg.message);
2112}
2113
2114/*
2115 * Function to store the command id and snapshot at the end of the current
2116 * stream so that we can reuse the same while sending the next stream.
2117 */
2118static inline void
2120 Snapshot snapshot_now, CommandId command_id)
2121{
2122 txn->command_id = command_id;
2123
2124 /* Avoid copying if it's already copied. */
2125 if (snapshot_now->copied)
2126 txn->snapshot_now = snapshot_now;
2127 else
2128 txn->snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2129 txn, command_id);
2130}
2131
2132/*
2133 * Mark the given transaction as streamed if it's a top-level transaction
2134 * or has changes.
2135 */
2136static void
2138{
2139 /*
2140 * The top-level transaction, is marked as streamed always, even if it
2141 * does not contain any changes (that is, when all the changes are in
2142 * subtransactions).
2143 *
2144 * For subtransactions, we only mark them as streamed when there are
2145 * changes in them.
2146 *
2147 * We do it this way because of aborts - we don't want to send aborts for
2148 * XIDs the downstream is not aware of. And of course, it always knows
2149 * about the top-level xact (we send the XID in all messages), but we
2150 * never stream XIDs of empty subxacts.
2151 */
2152 if (rbtxn_is_toptxn(txn) || (txn->nentries_mem != 0))
2154}
2155
2156/*
2157 * Helper function for ReorderBufferProcessTXN to handle the concurrent
2158 * abort of the streaming transaction. This resets the TXN such that it
2159 * can be used to stream the remaining data of transaction being processed.
2160 * This can happen when the subtransaction is aborted and we still want to
2161 * continue processing the main or other subtransactions data.
2162 */
2163static void
2165 Snapshot snapshot_now,
2166 CommandId command_id,
2167 XLogRecPtr last_lsn,
2169{
2170 /* Discard the changes that we just streamed */
2172
2173 /* Free all resources allocated for toast reconstruction */
2175
2176 /* Return the spec insert change if it is not NULL */
2177 if (specinsert != NULL)
2178 {
2180 specinsert = NULL;
2181 }
2182
2183 /*
2184 * For the streaming case, stop the stream and remember the command ID and
2185 * snapshot for the streaming run.
2186 */
2187 if (rbtxn_is_streamed(txn))
2188 {
2189 rb->stream_stop(rb, txn, last_lsn);
2190 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2191 }
2192
2193 /* All changes must be deallocated */
2194 Assert(txn->size == 0);
2195}
2196
2197/*
2198 * Helper function for ReorderBufferReplay and ReorderBufferStreamTXN.
2199 *
2200 * Send data of a transaction (and its subtransactions) to the
2201 * output plugin. We iterate over the top and subtransactions (using a k-way
2202 * merge) and replay the changes in lsn order.
2203 *
2204 * If streaming is true then data will be sent using stream API.
2205 *
2206 * Note: "volatile" markers on some parameters are to avoid trouble with
2207 * PG_TRY inside the function.
2208 */
2209static void
2211 XLogRecPtr commit_lsn,
2212 volatile Snapshot snapshot_now,
2213 volatile CommandId command_id,
2214 bool streaming)
2215{
2216 bool using_subtxn;
2222 volatile bool stream_started = false;
2223 ReorderBufferTXN *volatile curtxn = NULL;
2224
2225 /* build data to be able to lookup the CommandIds of catalog tuples */
2227
2228 /* setup the initial snapshot */
2229 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2230
2231 /*
2232 * Decoding needs access to syscaches et al., which in turn use
2233 * heavyweight locks and such. Thus we need to have enough state around to
2234 * keep track of those. The easiest way is to simply use a transaction
2235 * internally. That also allows us to easily enforce that nothing writes
2236 * to the database by checking for xid assignments.
2237 *
2238 * When we're called via the SQL SRF there's already a transaction
2239 * started, so start an explicit subtransaction there.
2240 */
2242
2243 PG_TRY();
2244 {
2245 ReorderBufferChange *change;
2246 int changes_count = 0; /* used to accumulate the number of
2247 * changes */
2248
2249 if (using_subtxn)
2250 BeginInternalSubTransaction(streaming ? "stream" : "replay");
2251 else
2253
2254 /*
2255 * We only need to send begin/begin-prepare for non-streamed
2256 * transactions.
2257 */
2258 if (!streaming)
2259 {
2260 if (rbtxn_is_prepared(txn))
2261 rb->begin_prepare(rb, txn);
2262 else
2263 rb->begin(rb, txn);
2264 }
2265
2267 while ((change = ReorderBufferIterTXNNext(rb, iterstate)) != NULL)
2268 {
2269 Relation relation = NULL;
2270 Oid reloid;
2271
2273
2274 /*
2275 * We can't call start stream callback before processing first
2276 * change.
2277 */
2279 {
2280 if (streaming)
2281 {
2282 txn->origin_id = change->origin_id;
2283 rb->stream_start(rb, txn, change->lsn);
2284 stream_started = true;
2285 }
2286 }
2287
2288 /*
2289 * Enforce correct ordering of changes, merged from multiple
2290 * subtransactions. The changes may have the same LSN due to
2291 * MULTI_INSERT xlog records.
2292 */
2294
2295 prev_lsn = change->lsn;
2296
2297 /*
2298 * Set the current xid to detect concurrent aborts. This is
2299 * required for the cases when we decode the changes before the
2300 * COMMIT record is processed.
2301 */
2302 if (streaming || rbtxn_is_prepared(change->txn))
2303 {
2304 curtxn = change->txn;
2306 }
2307
2308 switch (change->action)
2309 {
2311
2312 /*
2313 * Confirmation for speculative insertion arrived. Simply
2314 * use as a normal record. It'll be cleaned up at the end
2315 * of INSERT processing.
2316 */
2317 if (specinsert == NULL)
2318 elog(ERROR, "invalid ordering of speculative insertion changes");
2319 Assert(specinsert->data.tp.oldtuple == NULL);
2320 change = specinsert;
2322
2323 /* intentionally fall through */
2328 Assert(snapshot_now);
2329
2330 reloid = RelidByRelfilenumber(change->data.tp.rlocator.spcOid,
2331 change->data.tp.rlocator.relNumber);
2332
2333 /*
2334 * Mapped catalog tuple without data, emitted while
2335 * catalog table was in the process of being rewritten. We
2336 * can fail to look up the relfilenumber, because the
2337 * relmapper has no "historic" view, in contrast to the
2338 * normal catalog during decoding. Thus repeated rewrites
2339 * can cause a lookup failure. That's OK because we do not
2340 * decode catalog changes anyway. Normally such tuples
2341 * would be skipped over below, but we can't identify
2342 * whether the table should be logically logged without
2343 * mapping the relfilenumber to the oid.
2344 */
2345 if (reloid == InvalidOid &&
2346 change->data.tp.newtuple == NULL &&
2347 change->data.tp.oldtuple == NULL)
2348 goto change_done;
2349 else if (reloid == InvalidOid)
2350 elog(ERROR, "could not map filenumber \"%s\" to relation OID",
2351 relpathperm(change->data.tp.rlocator,
2352 MAIN_FORKNUM).str);
2353
2354 relation = RelationIdGetRelation(reloid);
2355
2356 if (!RelationIsValid(relation))
2357 elog(ERROR, "could not open relation with OID %u (for filenumber \"%s\")",
2358 reloid,
2359 relpathperm(change->data.tp.rlocator,
2360 MAIN_FORKNUM).str);
2361
2362 if (!RelationIsLogicallyLogged(relation))
2363 goto change_done;
2364
2365 /*
2366 * Ignore temporary heaps created during DDL unless the
2367 * plugin has asked for them.
2368 */
2369 if (relation->rd_rel->relrewrite && !rb->output_rewrites)
2370 goto change_done;
2371
2372 /*
2373 * For now ignore sequence changes entirely. Most of the
2374 * time they don't log changes using records we
2375 * understand, so it doesn't make sense to handle the few
2376 * cases we do.
2377 */
2378 if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
2379 goto change_done;
2380
2381 /* user-triggered change */
2382 if (!IsToastRelation(relation))
2383 {
2384 ReorderBufferToastReplace(rb, txn, relation, change);
2385 ReorderBufferApplyChange(rb, txn, relation, change,
2386 streaming);
2387
2388 /*
2389 * Only clear reassembled toast chunks if we're sure
2390 * they're not required anymore. The creator of the
2391 * tuple tells us.
2392 */
2393 if (change->data.tp.clear_toast_afterwards)
2395 }
2396 /* we're not interested in toast deletions */
2397 else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
2398 {
2399 /*
2400 * Need to reassemble the full toasted Datum in
2401 * memory, to ensure the chunks don't get reused till
2402 * we're done remove it from the list of this
2403 * transaction's changes. Otherwise it will get
2404 * freed/reused while restoring spooled data from
2405 * disk.
2406 */
2407 Assert(change->data.tp.newtuple != NULL);
2408
2409 dlist_delete(&change->node);
2410 ReorderBufferToastAppendChunk(rb, txn, relation,
2411 change);
2412 }
2413
2415
2416 /*
2417 * If speculative insertion was confirmed, the record
2418 * isn't needed anymore.
2419 */
2420 if (specinsert != NULL)
2421 {
2423 specinsert = NULL;
2424 }
2425
2426 if (RelationIsValid(relation))
2427 {
2428 RelationClose(relation);
2429 relation = NULL;
2430 }
2431 break;
2432
2434
2435 /*
2436 * Speculative insertions are dealt with by delaying the
2437 * processing of the insert until the confirmation record
2438 * arrives. For that we simply unlink the record from the
2439 * chain, so it does not get freed/reused while restoring
2440 * spooled data from disk.
2441 *
2442 * This is safe in the face of concurrent catalog changes
2443 * because the relevant relation can't be changed between
2444 * speculative insertion and confirmation due to
2445 * CheckTableNotInUse() and locking.
2446 */
2447
2448 /* clear out a pending (and thus failed) speculation */
2449 if (specinsert != NULL)
2450 {
2452 specinsert = NULL;
2453 }
2454
2455 /* and memorize the pending insertion */
2456 dlist_delete(&change->node);
2457 specinsert = change;
2458 break;
2459
2461
2462 /*
2463 * Abort for speculative insertion arrived. So cleanup the
2464 * specinsert tuple and toast hash.
2465 *
2466 * Note that we get the spec abort change for each toast
2467 * entry but we need to perform the cleanup only the first
2468 * time we get it for the main table.
2469 */
2470 if (specinsert != NULL)
2471 {
2472 /*
2473 * We must clean the toast hash before processing a
2474 * completely new tuple to avoid confusion about the
2475 * previous tuple's toast chunks.
2476 */
2479
2480 /* We don't need this record anymore. */
2482 specinsert = NULL;
2483 }
2484 break;
2485
2487 {
2488 int i;
2489 int nrelids = change->data.truncate.nrelids;
2490 int nrelations = 0;
2491 Relation *relations;
2492
2493 relations = palloc0(nrelids * sizeof(Relation));
2494 for (i = 0; i < nrelids; i++)
2495 {
2496 Oid relid = change->data.truncate.relids[i];
2497 Relation rel;
2498
2499 rel = RelationIdGetRelation(relid);
2500
2501 if (!RelationIsValid(rel))
2502 elog(ERROR, "could not open relation with OID %u", relid);
2503
2504 if (!RelationIsLogicallyLogged(rel))
2505 continue;
2506
2507 relations[nrelations++] = rel;
2508 }
2509
2510 /* Apply the truncate. */
2512 relations, change,
2513 streaming);
2514
2515 for (i = 0; i < nrelations; i++)
2516 RelationClose(relations[i]);
2517
2518 break;
2519 }
2520
2522 ReorderBufferApplyMessage(rb, txn, change, streaming);
2523 break;
2524
2526 /* Execute the invalidation messages locally */
2528 change->data.inval.invalidations);
2529 break;
2530
2532 /* get rid of the old */
2534
2535 if (snapshot_now->copied)
2536 {
2537 ReorderBufferFreeSnap(rb, snapshot_now);
2538 snapshot_now =
2540 txn, command_id);
2541 }
2542
2543 /*
2544 * Restored from disk, need to be careful not to double
2545 * free. We could introduce refcounting for that, but for
2546 * now this seems infrequent enough not to care.
2547 */
2548 else if (change->data.snapshot->copied)
2549 {
2550 snapshot_now =
2552 txn, command_id);
2553 }
2554 else
2555 {
2556 snapshot_now = change->data.snapshot;
2557 }
2558
2559 /* and continue with the new one */
2560 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2561 break;
2562
2565
2566 if (command_id < change->data.command_id)
2567 {
2568 command_id = change->data.command_id;
2569
2570 if (!snapshot_now->copied)
2571 {
2572 /* we don't use the global one anymore */
2573 snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2574 txn, command_id);
2575 }
2576
2577 snapshot_now->curcid = command_id;
2578
2580 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2581 }
2582
2583 break;
2584
2586 elog(ERROR, "tuplecid value in changequeue");
2587 break;
2588 }
2589
2590 /*
2591 * It is possible that the data is not sent to downstream for a
2592 * long time either because the output plugin filtered it or there
2593 * is a DDL that generates a lot of data that is not processed by
2594 * the plugin. So, in such cases, the downstream can timeout. To
2595 * avoid that we try to send a keepalive message if required.
2596 * Trying to send a keepalive message after every change has some
2597 * overhead, but testing showed there is no noticeable overhead if
2598 * we do it after every ~100 changes.
2599 */
2600#define CHANGES_THRESHOLD 100
2601
2603 {
2604 rb->update_progress_txn(rb, txn, prev_lsn);
2605 changes_count = 0;
2606 }
2607 }
2608
2609 /* speculative insertion record must be freed by now */
2611
2612 /* clean up the iterator */
2614 iterstate = NULL;
2615
2616 /*
2617 * Update total transaction count and total bytes processed by the
2618 * transaction and its subtransactions. Ensure to not count the
2619 * streamed transaction multiple times.
2620 *
2621 * Note that the statistics computation has to be done after
2622 * ReorderBufferIterTXNFinish as it releases the serialized change
2623 * which we have already accounted in ReorderBufferIterTXNNext.
2624 */
2625 if (!rbtxn_is_streamed(txn))
2626 rb->totalTxns++;
2627
2628 rb->totalBytes += txn->total_size;
2629
2630 /*
2631 * Done with current changes, send the last message for this set of
2632 * changes depending upon streaming mode.
2633 */
2634 if (streaming)
2635 {
2636 if (stream_started)
2637 {
2638 rb->stream_stop(rb, txn, prev_lsn);
2639 stream_started = false;
2640 }
2641 }
2642 else
2643 {
2644 /*
2645 * Call either PREPARE (for two-phase transactions) or COMMIT (for
2646 * regular ones).
2647 */
2648 if (rbtxn_is_prepared(txn))
2649 {
2651 rb->prepare(rb, txn, commit_lsn);
2653 }
2654 else
2655 rb->commit(rb, txn, commit_lsn);
2656 }
2657
2658 /* this is just a sanity check against bad output plugin behaviour */
2660 elog(ERROR, "output plugin used XID %u",
2662
2663 /*
2664 * Remember the command ID and snapshot for the next set of changes in
2665 * streaming mode.
2666 */
2667 if (streaming)
2668 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2669 else if (snapshot_now->copied)
2670 ReorderBufferFreeSnap(rb, snapshot_now);
2671
2672 /* cleanup */
2674
2675 /*
2676 * Aborting the current (sub-)transaction as a whole has the right
2677 * semantics. We want all locks acquired in here to be released, not
2678 * reassigned to the parent and we do not want any database access
2679 * have persistent effects.
2680 */
2682
2683 /* make sure there's no cache pollution */
2685 {
2688 }
2689 else
2690 {
2694 }
2695
2696 if (using_subtxn)
2697 {
2700 CurrentResourceOwner = cowner;
2701 }
2702
2703 /*
2704 * We are here due to one of the four reasons: 1. Decoding an
2705 * in-progress txn. 2. Decoding a prepared txn. 3. Decoding of a
2706 * prepared txn that was (partially) streamed. 4. Decoding a committed
2707 * txn.
2708 *
2709 * For 1, we allow truncation of txn data by removing the changes
2710 * already streamed but still keeping other things like invalidations,
2711 * snapshot, and tuplecids. For 2 and 3, we indicate
2712 * ReorderBufferTruncateTXN to do more elaborate truncation of txn
2713 * data as the entire transaction has been decoded except for commit.
2714 * For 4, as the entire txn has been decoded, we can fully clean up
2715 * the TXN reorder buffer.
2716 */
2717 if (streaming || rbtxn_is_prepared(txn))
2718 {
2719 if (streaming)
2721
2723 /* Reset the CheckXidAlive */
2725 }
2726 else
2728 }
2729 PG_CATCH();
2730 {
2733
2734 /* TODO: Encapsulate cleanup from the PG_TRY and PG_CATCH blocks */
2735 if (iterstate)
2737
2739
2740 /*
2741 * Force cache invalidation to happen outside of a valid transaction
2742 * to prevent catalog access as we just caught an error.
2743 */
2745
2746 /* make sure there's no cache pollution */
2748 {
2751 }
2752 else
2753 {
2757 }
2758
2759 if (using_subtxn)
2760 {
2763 CurrentResourceOwner = cowner;
2764 }
2765
2766 /*
2767 * The error code ERRCODE_TRANSACTION_ROLLBACK indicates a concurrent
2768 * abort of the (sub)transaction we are streaming or preparing. We
2769 * need to do the cleanup and return gracefully on this error, see
2770 * SetupCheckXidLive.
2771 *
2772 * This error code can be thrown by one of the callbacks we call
2773 * during decoding so we need to ensure that we return gracefully only
2774 * when we are sending the data in streaming mode and the streaming is
2775 * not finished yet or when we are sending the data out on a PREPARE
2776 * during a two-phase commit.
2777 */
2778 if (errdata->sqlerrcode == ERRCODE_TRANSACTION_ROLLBACK &&
2780 {
2781 /* curtxn must be set for streaming or prepared transactions */
2782 Assert(curtxn);
2783
2784 /* Cleanup the temporary error state. */
2787 errdata = NULL;
2788
2789 /* Remember the transaction is aborted. */
2791 curtxn->txn_flags |= RBTXN_IS_ABORTED;
2792
2793 /* Mark the transaction is streamed if appropriate */
2794 if (stream_started)
2796
2797 /* Reset the TXN so that it is allowed to stream remaining data. */
2798 ReorderBufferResetTXN(rb, txn, snapshot_now,
2799 command_id, prev_lsn,
2800 specinsert);
2801 }
2802 else
2803 {
2806 PG_RE_THROW();
2807 }
2808 }
2809 PG_END_TRY();
2810}
2811
2812/*
2813 * Perform the replay of a transaction and its non-aborted subtransactions.
2814 *
2815 * Subtransactions previously have to be processed by
2816 * ReorderBufferCommitChild(), even if previously assigned to the toplevel
2817 * transaction with ReorderBufferAssignChild.
2818 *
2819 * This interface is called once a prepare or toplevel commit is read for both
2820 * streamed as well as non-streamed transactions.
2821 */
2822static void
2825 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
2826 TimestampTz commit_time,
2827 ReplOriginId origin_id, XLogRecPtr origin_lsn)
2828{
2829 Snapshot snapshot_now;
2830 CommandId command_id = FirstCommandId;
2831
2832 txn->final_lsn = commit_lsn;
2833 txn->end_lsn = end_lsn;
2834 txn->commit_time = commit_time;
2835 txn->origin_id = origin_id;
2836 txn->origin_lsn = origin_lsn;
2837
2838 /*
2839 * If the transaction was (partially) streamed, we need to commit it in a
2840 * 'streamed' way. That is, we first stream the remaining part of the
2841 * transaction, and then invoke stream_commit message.
2842 *
2843 * Called after everything (origin ID, LSN, ...) is stored in the
2844 * transaction to avoid passing that information directly.
2845 */
2846 if (rbtxn_is_streamed(txn))
2847 {
2849 return;
2850 }
2851
2852 /*
2853 * If this transaction has no snapshot, it didn't make any changes to the
2854 * database, so there's nothing to decode. Note that
2855 * ReorderBufferCommitChild will have transferred any snapshots from
2856 * subtransactions if there were any.
2857 */
2858 if (txn->base_snapshot == NULL)
2859 {
2860 Assert(txn->ninvalidations == 0);
2861
2862 /*
2863 * Removing this txn before a commit might result in the computation
2864 * of an incorrect restart_lsn. See SnapBuildProcessRunningXacts.
2865 */
2866 if (!rbtxn_is_prepared(txn))
2868 return;
2869 }
2870
2871 snapshot_now = txn->base_snapshot;
2872
2873 /* Process and send the changes to output plugin. */
2874 ReorderBufferProcessTXN(rb, txn, commit_lsn, snapshot_now,
2875 command_id, false);
2876}
2877
2878/*
2879 * Commit a transaction.
2880 *
2881 * See comments for ReorderBufferReplay().
2882 */
2883void
2885 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
2886 TimestampTz commit_time,
2887 ReplOriginId origin_id, XLogRecPtr origin_lsn)
2888{
2889 ReorderBufferTXN *txn;
2890
2891 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2892 false);
2893
2894 /* unknown transaction, nothing to replay */
2895 if (txn == NULL)
2896 return;
2897
2898 ReorderBufferReplay(txn, rb, xid, commit_lsn, end_lsn, commit_time,
2899 origin_id, origin_lsn);
2900}
2901
2902/*
2903 * Record the prepare information for a transaction. Also, mark the transaction
2904 * as a prepared transaction.
2905 */
2906bool
2908 XLogRecPtr prepare_lsn, XLogRecPtr end_lsn,
2909 TimestampTz prepare_time,
2910 ReplOriginId origin_id, XLogRecPtr origin_lsn)
2911{
2912 ReorderBufferTXN *txn;
2913
2914 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2915
2916 /* unknown transaction, nothing to do */
2917 if (txn == NULL)
2918 return false;
2919
2920 /*
2921 * Remember the prepare information to be later used by commit prepared in
2922 * case we skip doing prepare.
2923 */
2924 txn->final_lsn = prepare_lsn;
2925 txn->end_lsn = end_lsn;
2926 txn->prepare_time = prepare_time;
2927 txn->origin_id = origin_id;
2928 txn->origin_lsn = origin_lsn;
2929
2930 /* Mark this transaction as a prepared transaction */
2933
2934 return true;
2935}
2936
2937/* Remember that we have skipped prepare */
2938void
2940{
2941 ReorderBufferTXN *txn;
2942
2943 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2944
2945 /* unknown transaction, nothing to do */
2946 if (txn == NULL)
2947 return;
2948
2949 /* txn must have been marked as a prepared transaction */
2952}
2953
2954/*
2955 * Prepare a two-phase transaction.
2956 *
2957 * See comments for ReorderBufferReplay().
2958 */
2959void
2961 char *gid)
2962{
2963 ReorderBufferTXN *txn;
2964
2965 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2966 false);
2967
2968 /* unknown transaction, nothing to replay */
2969 if (txn == NULL)
2970 return;
2971
2972 /*
2973 * txn must have been marked as a prepared transaction and must have
2974 * neither been skipped nor sent a prepare. Also, the prepare info must
2975 * have been updated in it by now.
2976 */
2979
2980 txn->gid = pstrdup(gid);
2981
2982 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
2983 txn->prepare_time, txn->origin_id, txn->origin_lsn);
2984
2985 /*
2986 * Send a prepare if not already done so. This might occur if we have
2987 * detected a concurrent abort while replaying the non-streaming
2988 * transaction.
2989 */
2990 if (!rbtxn_sent_prepare(txn))
2991 {
2992 rb->prepare(rb, txn, txn->final_lsn);
2994 }
2995}
2996
2997/*
2998 * This is used to handle COMMIT/ROLLBACK PREPARED.
2999 */
3000void
3002 XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
3003 XLogRecPtr two_phase_at,
3004 TimestampTz commit_time, ReplOriginId origin_id,
3005 XLogRecPtr origin_lsn, char *gid, bool is_commit)
3006{
3007 ReorderBufferTXN *txn;
3008 XLogRecPtr prepare_end_lsn;
3009 TimestampTz prepare_time;
3010
3011 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, commit_lsn, false);
3012
3013 /* unknown transaction, nothing to do */
3014 if (txn == NULL)
3015 return;
3016
3017 /*
3018 * By this time the txn has the prepare record information, remember it to
3019 * be later used for rollback.
3020 */
3021 prepare_end_lsn = txn->end_lsn;
3022 prepare_time = txn->prepare_time;
3023
3024 /* add the gid in the txn */
3025 txn->gid = pstrdup(gid);
3026
3027 /*
3028 * It is possible that this transaction is not decoded at prepare time
3029 * either because by that time we didn't have a consistent snapshot, or
3030 * two_phase was not enabled, or it was decoded earlier but we have
3031 * restarted. We only need to send the prepare if it was not decoded
3032 * earlier. We don't need to decode the xact for aborts if it is not done
3033 * already.
3034 */
3035 if ((txn->final_lsn < two_phase_at) && is_commit)
3036 {
3037 /*
3038 * txn must have been marked as a prepared transaction and skipped but
3039 * not sent a prepare. Also, the prepare info must have been updated
3040 * in txn even if we skip prepare.
3041 */
3045
3046 /*
3047 * By this time the txn has the prepare record information and it is
3048 * important to use that so that downstream gets the accurate
3049 * information. If instead, we have passed commit information here
3050 * then downstream can behave as it has already replayed commit
3051 * prepared after the restart.
3052 */
3053 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
3054 txn->prepare_time, txn->origin_id, txn->origin_lsn);
3055 }
3056
3057 txn->final_lsn = commit_lsn;
3058 txn->end_lsn = end_lsn;
3059 txn->commit_time = commit_time;
3060 txn->origin_id = origin_id;
3061 txn->origin_lsn = origin_lsn;
3062
3063 if (is_commit)
3064 rb->commit_prepared(rb, txn, commit_lsn);
3065 else
3066 rb->rollback_prepared(rb, txn, prepare_end_lsn, prepare_time);
3067
3068 /* cleanup: make sure there's no cache pollution */
3070 txn->invalidations);
3072}
3073
3074/*
3075 * Abort a transaction that possibly has previous changes. Needs to be first
3076 * called for subtransactions and then for the toplevel xid.
3077 *
3078 * NB: Transactions handled here have to have actively aborted (i.e. have
3079 * produced an abort record). Implicitly aborted transactions are handled via
3080 * ReorderBufferAbortOld(); transactions we're just not interested in, but
3081 * which have committed are handled in ReorderBufferForget().
3082 *
3083 * This function purges this transaction and its contents from memory and
3084 * disk.
3085 */
3086void
3088 TimestampTz abort_time)
3089{
3090 ReorderBufferTXN *txn;
3091
3092 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3093 false);
3094
3095 /* unknown, nothing to remove */
3096 if (txn == NULL)
3097 return;
3098
3099 txn->abort_time = abort_time;
3100
3101 /* For streamed transactions notify the remote node about the abort. */
3102 if (rbtxn_is_streamed(txn))
3103 {
3104 rb->stream_abort(rb, txn, lsn);
3105
3106 /*
3107 * We might have decoded changes for this transaction that could load
3108 * the cache as per the current transaction's view (consider DDL's
3109 * happened in this transaction). We don't want the decoding of future
3110 * transactions to use those cache entries so execute only the inval
3111 * messages in this transaction.
3112 */
3113 if (txn->ninvalidations > 0)
3115 txn->invalidations);
3116 }
3117
3118 /* cosmetic... */
3119 txn->final_lsn = lsn;
3120
3121 /* remove potential on-disk data, and deallocate */
3123}
3124
3125/*
3126 * Abort all transactions that aren't actually running anymore because the
3127 * server restarted.
3128 *
3129 * NB: These really have to be transactions that have aborted due to a server
3130 * crash/immediate restart, as we don't deal with invalidations here.
3131 */
3132void
3134{
3136
3137 /*
3138 * Iterate through all (potential) toplevel TXNs and abort all that are
3139 * older than what possibly can be running. Once we've found the first
3140 * that is alive we stop, there might be some that acquired an xid earlier
3141 * but started writing later, but it's unlikely and they will be cleaned
3142 * up in a later call to this function.
3143 */
3144 dlist_foreach_modify(it, &rb->toplevel_by_lsn)
3145 {
3146 ReorderBufferTXN *txn;
3147
3148 txn = dlist_container(ReorderBufferTXN, node, it.cur);
3149
3150 if (TransactionIdPrecedes(txn->xid, oldestRunningXid))
3151 {
3152 elog(DEBUG2, "aborting old transaction %u", txn->xid);
3153
3154 /* Notify the remote node about the crash/immediate restart. */
3155 if (rbtxn_is_streamed(txn))
3156 rb->stream_abort(rb, txn, InvalidXLogRecPtr);
3157
3158 /* remove potential on-disk data, and deallocate this tx */
3160 }
3161 else
3162 return;
3163 }
3164}
3165
3166/*
3167 * Forget the contents of a transaction if we aren't interested in its
3168 * contents. Needs to be first called for subtransactions and then for the
3169 * toplevel xid.
3170 *
3171 * This is significantly different to ReorderBufferAbort() because
3172 * transactions that have committed need to be treated differently from aborted
3173 * ones since they may have modified the catalog.
3174 *
3175 * Note that this is only allowed to be called in the moment a transaction
3176 * commit has just been read, not earlier; otherwise later records referring
3177 * to this xid might re-create the transaction incompletely.
3178 */
3179void
3181{
3182 ReorderBufferTXN *txn;
3183
3184 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3185 false);
3186
3187 /* unknown, nothing to forget */
3188 if (txn == NULL)
3189 return;
3190
3191 /* this transaction mustn't be streamed */
3193
3194 /* cosmetic... */
3195 txn->final_lsn = lsn;
3196
3197 /*
3198 * Process only cache invalidation messages in this transaction if there
3199 * are any. Even if we're not interested in the transaction's contents, it
3200 * could have manipulated the catalog and we need to update the caches
3201 * according to that.
3202 */
3203 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3205 txn->invalidations);
3206 else
3207 Assert(txn->ninvalidations == 0);
3208
3209 /* remove potential on-disk data, and deallocate */
3211}
3212
3213/*
3214 * Invalidate cache for those transactions that need to be skipped just in case
3215 * catalogs were manipulated as part of the transaction.
3216 *
3217 * Note that this is a special-purpose function for prepared transactions where
3218 * we don't want to clean up the TXN even when we decide to skip it. See
3219 * DecodePrepare.
3220 */
3221void
3223{
3224 ReorderBufferTXN *txn;
3225
3226 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3227 false);
3228
3229 /* unknown, nothing to do */
3230 if (txn == NULL)
3231 return;
3232
3233 /*
3234 * Process cache invalidation messages if there are any. Even if we're not
3235 * interested in the transaction's contents, it could have manipulated the
3236 * catalog and we need to update the caches according to that.
3237 */
3238 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3240 txn->invalidations);
3241 else
3242 Assert(txn->ninvalidations == 0);
3243}
3244
3245
3246/*
3247 * Execute invalidations happening outside the context of a decoded
3248 * transaction. That currently happens either for xid-less commits
3249 * (cf. RecordTransactionCommit()) or for invalidations in uninteresting
3250 * transactions (via ReorderBufferForget()).
3251 */
3252void
3254 SharedInvalidationMessage *invalidations)
3255{
3259 int i;
3260
3261 if (use_subtxn)
3263
3264 /*
3265 * Force invalidations to happen outside of a valid transaction - that way
3266 * entries will just be marked as invalid without accessing the catalog.
3267 * That's advantageous because we don't need to setup the full state
3268 * necessary for catalog access.
3269 */
3270 if (use_subtxn)
3272
3273 for (i = 0; i < ninvalidations; i++)
3274 LocalExecuteInvalidationMessage(&invalidations[i]);
3275
3276 if (use_subtxn)
3277 {
3280 CurrentResourceOwner = cowner;
3281 }
3282}
3283
3284/*
3285 * Tell reorderbuffer about an xid seen in the WAL stream. Has to be called at
3286 * least once for every xid in XLogRecord->xl_xid (other places in records
3287 * may, but do not have to be passed through here).
3288 *
3289 * Reorderbuffer keeps some data structures about transactions in LSN order,
3290 * for efficiency. To do that it has to know about when transactions are seen
3291 * first in the WAL. As many types of records are not actually interesting for
3292 * logical decoding, they do not necessarily pass through here.
3293 */
3294void
3296{
3297 /* many records won't have an xid assigned, centralize check here */
3298 if (xid != InvalidTransactionId)
3299 ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3300}
3301
3302/*
3303 * Add a new snapshot to this transaction that may only used after lsn 'lsn'
3304 * because the previous snapshot doesn't describe the catalog correctly for
3305 * following rows.
3306 */
3307void
3310{
3312
3313 change->data.snapshot = snap;
3315
3316 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3317}
3318
3319/*
3320 * Set up the transaction's base snapshot.
3321 *
3322 * If we know that xid is a subtransaction, set the base snapshot on the
3323 * top-level transaction instead.
3324 */
3325void
3328{
3329 ReorderBufferTXN *txn;
3330 bool is_new;
3331
3332 Assert(snap != NULL);
3333
3334 /*
3335 * Fetch the transaction to operate on. If we know it's a subtransaction,
3336 * operate on its top-level transaction instead.
3337 */
3338 txn = ReorderBufferTXNByXid(rb, xid, true, &is_new, lsn, true);
3339 if (rbtxn_is_known_subxact(txn))
3340 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3341 NULL, InvalidXLogRecPtr, false);
3342 Assert(txn->base_snapshot == NULL);
3343
3344 txn->base_snapshot = snap;
3345 txn->base_snapshot_lsn = lsn;
3346 dlist_push_tail(&rb->txns_by_base_snapshot_lsn, &txn->base_snapshot_node);
3347
3349}
3350
3351/*
3352 * Access the catalog with this CommandId at this point in the changestream.
3353 *
3354 * May only be called for command ids > 1
3355 */
3356void
3359{
3361
3362 change->data.command_id = cid;
3364
3365 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3366}
3367
3368/*
3369 * Update memory counters to account for the new or removed change.
3370 *
3371 * We update two counters - in the reorder buffer, and in the transaction
3372 * containing the change. The reorder buffer counter allows us to quickly
3373 * decide if we reached the memory limit, the transaction counter allows
3374 * us to quickly pick the largest transaction for eviction.
3375 *
3376 * Either txn or change must be non-NULL at least. We update the memory
3377 * counter of txn if it's non-NULL, otherwise change->txn.
3378 *
3379 * When streaming is enabled, we need to update the toplevel transaction
3380 * counters instead - we don't really care about subtransactions as we
3381 * can't stream them individually anyway, and we only pick toplevel
3382 * transactions for eviction. So only toplevel transactions matter.
3383 */
3384static void
3386 ReorderBufferChange *change,
3387 ReorderBufferTXN *txn,
3388 bool addition, Size sz)
3389{
3390 ReorderBufferTXN *toptxn;
3391
3392 Assert(txn || change);
3393
3394 /*
3395 * Ignore tuple CID changes, because those are not evicted when reaching
3396 * memory limit. So we just don't count them, because it might easily
3397 * trigger a pointless attempt to spill.
3398 */
3399 if (change && change->action == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID)
3400 return;
3401
3402 if (sz == 0)
3403 return;
3404
3405 if (txn == NULL)
3406 txn = change->txn;
3407 Assert(txn != NULL);
3408
3409 /*
3410 * Update the total size in top level as well. This is later used to
3411 * compute the decoding stats.
3412 */
3413 toptxn = rbtxn_get_toptxn(txn);
3414
3415 if (addition)
3416 {
3417 Size oldsize = txn->size;
3418
3419 txn->size += sz;
3420 rb->size += sz;
3421
3422 /* Update the total size in the top transaction. */
3423 toptxn->total_size += sz;
3424
3425 /* Update the max-heap */
3426 if (oldsize != 0)
3427 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3428 pairingheap_add(rb->txn_heap, &txn->txn_node);
3429 }
3430 else
3431 {
3432 Assert((rb->size >= sz) && (txn->size >= sz));
3433 txn->size -= sz;
3434 rb->size -= sz;
3435
3436 /* Update the total size in the top transaction. */
3437 toptxn->total_size -= sz;
3438
3439 /* Update the max-heap */
3440 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3441 if (txn->size != 0)
3442 pairingheap_add(rb->txn_heap, &txn->txn_node);
3443 }
3444
3445 Assert(txn->size <= rb->size);
3446}
3447
3448/*
3449 * Add new (relfilelocator, tid) -> (cmin, cmax) mappings.
3450 *
3451 * We do not include this change type in memory accounting, because we
3452 * keep CIDs in a separate list and do not evict them when reaching
3453 * the memory limit.
3454 */
3455void
3457 XLogRecPtr lsn, RelFileLocator locator,
3458 ItemPointerData tid, CommandId cmin,
3459 CommandId cmax, CommandId combocid)
3460{
3462 ReorderBufferTXN *txn;
3463
3464 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3465
3466 change->data.tuplecid.locator = locator;
3467 change->data.tuplecid.tid = tid;
3468 change->data.tuplecid.cmin = cmin;
3469 change->data.tuplecid.cmax = cmax;
3470 change->data.tuplecid.combocid = combocid;
3471 change->lsn = lsn;
3472 change->txn = txn;
3474
3475 dlist_push_tail(&txn->tuplecids, &change->node);
3476 txn->ntuplecids++;
3477}
3478
3479/*
3480 * Add new invalidation messages to the reorder buffer queue.
3481 */
3482static void
3484 XLogRecPtr lsn, Size nmsgs,
3486{
3487 ReorderBufferChange *change;
3488
3489 change = ReorderBufferAllocChange(rb);
3491 change->data.inval.ninvalidations = nmsgs;
3493 memcpy(change->data.inval.invalidations, msgs,
3494 sizeof(SharedInvalidationMessage) * nmsgs);
3495
3496 ReorderBufferQueueChange(rb, xid, lsn, change, false);
3497}
3498
3499/*
3500 * A helper function for ReorderBufferAddInvalidations() and
3501 * ReorderBufferAddDistributedInvalidations() to accumulate the invalidation
3502 * messages to the **invals_out.
3503 */
3504static void
3509{
3510 if (*ninvals_out == 0)
3511 {
3515 }
3516 else
3517 {
3518 /* Enlarge the array of inval messages */
3521 (*ninvals_out + nmsgs_new));
3525 }
3526}
3527
3528/*
3529 * Accumulate the invalidations for executing them later.
3530 *
3531 * This needs to be called for each XLOG_XACT_INVALIDATIONS message and
3532 * accumulates all the invalidation messages in the toplevel transaction, if
3533 * available, otherwise in the current transaction, as well as in the form of
3534 * change in reorder buffer. We require to record it in form of the change
3535 * so that we can execute only the required invalidations instead of executing
3536 * all the invalidations on each CommandId increment. We also need to
3537 * accumulate these in the txn buffer because in some cases where we skip
3538 * processing the transaction (see ReorderBufferForget), we need to execute
3539 * all the invalidations together.
3540 */
3541void
3543 XLogRecPtr lsn, Size nmsgs,
3545{
3546 ReorderBufferTXN *txn;
3547 MemoryContext oldcontext;
3548
3549 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3550
3551 oldcontext = MemoryContextSwitchTo(rb->context);
3552
3553 /*
3554 * Collect all the invalidations under the top transaction, if available,
3555 * so that we can execute them all together. See comments atop this
3556 * function.
3557 */
3558 txn = rbtxn_get_toptxn(txn);
3559
3560 Assert(nmsgs > 0);
3561
3563 &txn->ninvalidations,
3564 msgs, nmsgs);
3565
3566 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3567
3568 MemoryContextSwitchTo(oldcontext);
3569}
3570
3571/*
3572 * Accumulate the invalidations distributed by other committed transactions
3573 * for executing them later.
3574 *
3575 * This function is similar to ReorderBufferAddInvalidations() but stores
3576 * the given inval messages to the txn->invalidations_distributed with the
3577 * overflow check.
3578 *
3579 * This needs to be called by committed transactions to distribute their
3580 * inval messages to in-progress transactions.
3581 */
3582void
3584 XLogRecPtr lsn, Size nmsgs,
3586{
3587 ReorderBufferTXN *txn;
3588 MemoryContext oldcontext;
3589
3590 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3591
3592 oldcontext = MemoryContextSwitchTo(rb->context);
3593
3594 /*
3595 * Collect all the invalidations under the top transaction, if available,
3596 * so that we can execute them all together. See comments
3597 * ReorderBufferAddInvalidations.
3598 */
3599 txn = rbtxn_get_toptxn(txn);
3600
3601 Assert(nmsgs > 0);
3602
3604 {
3605 /*
3606 * Check the transaction has enough space for storing distributed
3607 * invalidation messages.
3608 */
3610 {
3611 /*
3612 * Mark the invalidation message as overflowed and free up the
3613 * messages accumulated so far.
3614 */
3616
3618 {
3622 }
3623 }
3624 else
3627 msgs, nmsgs);
3628 }
3629
3630 /* Queue the invalidation messages into the transaction */
3631 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3632
3633 MemoryContextSwitchTo(oldcontext);
3634}
3635
3636/*
3637 * Apply all invalidations we know. Possibly we only need parts at this point
3638 * in the changestream but we don't know which those are.
3639 */
3640static void
3642{
3643 int i;
3644
3645 for (i = 0; i < nmsgs; i++)
3647}
3648
3649/*
3650 * Mark a transaction as containing catalog changes
3651 */
3652void
3654 XLogRecPtr lsn)
3655{
3656 ReorderBufferTXN *txn;
3657
3658 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3659
3660 if (!rbtxn_has_catalog_changes(txn))
3661 {
3663 dclist_push_tail(&rb->catchange_txns, &txn->catchange_node);
3664 }
3665
3666 /*
3667 * Mark top-level transaction as having catalog changes too if one of its
3668 * children has so that the ReorderBufferBuildTupleCidHash can
3669 * conveniently check just top-level transaction and decide whether to
3670 * build the hash table or not.
3671 */
3672 if (rbtxn_is_subtxn(txn))
3673 {
3674 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
3675
3676 if (!rbtxn_has_catalog_changes(toptxn))
3677 {
3679 dclist_push_tail(&rb->catchange_txns, &toptxn->catchange_node);
3680 }
3681 }
3682}
3683
3684/*
3685 * Return palloc'ed array of the transactions that have changed catalogs.
3686 * The returned array is sorted in xidComparator order.
3687 *
3688 * The caller must free the returned array when done with it.
3689 */
3692{
3693 dlist_iter iter;
3694 TransactionId *xids = NULL;
3695 size_t xcnt = 0;
3696
3697 /* Quick return if the list is empty */
3698 if (dclist_count(&rb->catchange_txns) == 0)
3699 return NULL;
3700
3701 /* Initialize XID array */
3702 xids = palloc_array(TransactionId, dclist_count(&rb->catchange_txns));
3703 dclist_foreach(iter, &rb->catchange_txns)
3704 {
3706 catchange_node,
3707 iter.cur);
3708
3710
3711 xids[xcnt++] = txn->xid;
3712 }
3713
3714 qsort(xids, xcnt, sizeof(TransactionId), xidComparator);
3715
3716 Assert(xcnt == dclist_count(&rb->catchange_txns));
3717 return xids;
3718}
3719
3720/*
3721 * Query whether a transaction is already *known* to contain catalog
3722 * changes. This can be wrong until directly before the commit!
3723 */
3724bool
3726{
3727 ReorderBufferTXN *txn;
3728
3729 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3730 false);
3731 if (txn == NULL)
3732 return false;
3733
3734 return rbtxn_has_catalog_changes(txn);
3735}
3736
3737/*
3738 * ReorderBufferXidHasBaseSnapshot
3739 * Have we already set the base snapshot for the given txn/subtxn?
3740 */
3741bool
3743{
3744 ReorderBufferTXN *txn;
3745
3746 txn = ReorderBufferTXNByXid(rb, xid, false,
3747 NULL, InvalidXLogRecPtr, false);
3748
3749 /* transaction isn't known yet, ergo no snapshot */
3750 if (txn == NULL)
3751 return false;
3752
3753 /* a known subtxn? operate on top-level txn instead */
3754 if (rbtxn_is_known_subxact(txn))
3755 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3756 NULL, InvalidXLogRecPtr, false);
3757
3758 return txn->base_snapshot != NULL;
3759}
3760
3761
3762/*
3763 * ---------------------------------------
3764 * Disk serialization support
3765 * ---------------------------------------
3766 */
3767
3768/*
3769 * Ensure the IO buffer is >= sz.
3770 */
3771static void
3773{
3774 if (!rb->outbufsize)
3775 {
3776 rb->outbuf = MemoryContextAlloc(rb->context, sz);
3777 rb->outbufsize = sz;
3778 }
3779 else if (rb->outbufsize < sz)
3780 {
3781 rb->outbuf = repalloc(rb->outbuf, sz);
3782 rb->outbufsize = sz;
3783 }
3784}
3785
3786
3787/* Compare two transactions by size */
3788static int
3790{
3793
3794 if (ta->size < tb->size)
3795 return -1;
3796 if (ta->size > tb->size)
3797 return 1;
3798 return 0;
3799}
3800
3801/*
3802 * Find the largest transaction (toplevel or subxact) to evict (spill to disk).
3803 */
3804static ReorderBufferTXN *
3806{
3808
3809 /* Get the largest transaction from the max-heap */
3811 pairingheap_first(rb->txn_heap));
3812
3813 Assert(largest);
3814 Assert(largest->size > 0);
3815 Assert(largest->size <= rb->size);
3816
3817 return largest;
3818}
3819
3820/*
3821 * Find the largest streamable (and non-aborted) toplevel transaction to evict
3822 * (by streaming).
3823 *
3824 * This can be seen as an optimized version of ReorderBufferLargestTXN, which
3825 * should give us the same transaction (because we don't update memory account
3826 * for subtransaction with streaming, so it's always 0). But we can simply
3827 * iterate over the limited number of toplevel transactions that have a base
3828 * snapshot. There is no use of selecting a transaction that doesn't have base
3829 * snapshot because we don't decode such transactions. Also, we do not select
3830 * the transaction which doesn't have any streamable change.
3831 *
3832 * Note that, we skip transactions that contain incomplete changes. There
3833 * is a scope of optimization here such that we can select the largest
3834 * transaction which has incomplete changes. But that will make the code and
3835 * design quite complex and that might not be worth the benefit. If we plan to
3836 * stream the transactions that contain incomplete changes then we need to
3837 * find a way to partially stream/truncate the transaction changes in-memory
3838 * and build a mechanism to partially truncate the spilled files.
3839 * Additionally, whenever we partially stream the transaction we need to
3840 * maintain the last streamed lsn and next time we need to restore from that
3841 * segment and the offset in WAL. As we stream the changes from the top
3842 * transaction and restore them subtransaction wise, we need to even remember
3843 * the subxact from where we streamed the last change.
3844 */
3845static ReorderBufferTXN *
3847{
3848 dlist_iter iter;
3849 Size largest_size = 0;
3851
3852 /* Find the largest top-level transaction having a base snapshot. */
3853 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
3854 {
3855 ReorderBufferTXN *txn;
3856
3857 txn = dlist_container(ReorderBufferTXN, base_snapshot_node, iter.cur);
3858
3859 /* must not be a subtxn */
3861 /* base_snapshot must be set */
3862 Assert(txn->base_snapshot != NULL);
3863
3864 /* Don't consider these kinds of transactions for eviction. */
3865 if (rbtxn_has_partial_change(txn) ||
3867 rbtxn_is_aborted(txn))
3868 continue;
3869
3870 /* Find the largest of the eviction candidates. */
3871 if ((largest == NULL || txn->total_size > largest_size) &&
3872 (txn->total_size > 0))
3873 {
3874 largest = txn;
3875 largest_size = txn->total_size;
3876 }
3877 }
3878
3879 return largest;
3880}
3881
3882/*
3883 * Check whether the logical_decoding_work_mem limit was reached, and if yes
3884 * pick the largest (sub)transaction at-a-time to evict and spill its changes to
3885 * disk or send to the output plugin until we reach under the memory limit.
3886 *
3887 * If debug_logical_replication_streaming is set to "immediate", stream or
3888 * serialize the changes immediately.
3889 *
3890 * XXX At this point we select the transactions until we reach under the memory
3891 * limit, but we might also adapt a more elaborate eviction strategy - for example
3892 * evicting enough transactions to free certain fraction (e.g. 50%) of the memory
3893 * limit.
3894 */
3895static void
3897{
3898 ReorderBufferTXN *txn;
3899 bool update_stats = true;
3900
3901 if (rb->size >= logical_decoding_work_mem * (Size) 1024)
3902 {
3903 /*
3904 * Update the statistics as the memory usage has reached the limit. We
3905 * report the statistics update later in this function since we can
3906 * update the slot statistics altogether while streaming or
3907 * serializing transactions in most cases.
3908 */
3909 rb->memExceededCount += 1;
3910 }
3912 {
3913 /*
3914 * Bail out if debug_logical_replication_streaming is buffered and we
3915 * haven't exceeded the memory limit.
3916 */
3917 return;
3918 }
3919
3920 /*
3921 * If debug_logical_replication_streaming is immediate, loop until there's
3922 * no change. Otherwise, loop until we reach under the memory limit. One
3923 * might think that just by evicting the largest (sub)transaction we will
3924 * come under the memory limit based on assumption that the selected
3925 * transaction is at least as large as the most recent change (which
3926 * caused us to go over the memory limit). However, that is not true
3927 * because a user can reduce the logical_decoding_work_mem to a smaller
3928 * value before the most recent change.
3929 */
3930 while (rb->size >= logical_decoding_work_mem * (Size) 1024 ||
3932 rb->size > 0))
3933 {
3934 /*
3935 * Pick the largest non-aborted transaction and evict it from memory
3936 * by streaming, if possible. Otherwise, spill to disk.
3937 */
3940 {
3941 /* we know there has to be one, because the size is not zero */
3942 Assert(txn && rbtxn_is_toptxn(txn));
3943 Assert(txn->total_size > 0);
3944 Assert(rb->size >= txn->total_size);
3945
3946 /* skip the transaction if aborted */
3948 continue;
3949
3951 }
3952 else
3953 {
3954 /*
3955 * Pick the largest transaction (or subtransaction) and evict it
3956 * from memory by serializing it to disk.
3957 */
3959
3960 /* we know there has to be one, because the size is not zero */
3961 Assert(txn);
3962 Assert(txn->size > 0);
3963 Assert(rb->size >= txn->size);
3964
3965 /* skip the transaction if aborted */
3967 continue;
3968
3970 }
3971
3972 /*
3973 * After eviction, the transaction should have no entries in memory,
3974 * and should use 0 bytes for changes.
3975 */
3976 Assert(txn->size == 0);
3977 Assert(txn->nentries_mem == 0);
3978
3979 /*
3980 * We've reported the memExceededCount update while streaming or
3981 * serializing the transaction.
3982 */
3983 update_stats = false;
3984 }
3985
3986 if (update_stats)
3988
3989 /* We must be under the memory limit now. */
3990 Assert(rb->size < logical_decoding_work_mem * (Size) 1024);
3991}
3992
3993/*
3994 * Spill data of a large transaction (and its subtransactions) to disk.
3995 */
3996static void
3998{
4001 int fd = -1;
4003 Size spilled = 0;
4004 Size size = txn->size;
4005
4006 elog(DEBUG2, "spill %u changes in XID %u to disk",
4007 (uint32) txn->nentries_mem, txn->xid);
4008
4009 /* do the same to all child TXs */
4011 {
4013
4016 }
4017
4018 /* serialize changestream */
4020 {
4021 ReorderBufferChange *change;
4022
4023 change = dlist_container(ReorderBufferChange, node, change_i.cur);
4024
4025 /*
4026 * store in segment in which it belongs by start lsn, don't split over
4027 * multiple segments tho
4028 */
4029 if (fd == -1 ||
4031 {
4032 char path[MAXPGPATH];
4033
4034 if (fd != -1)
4036
4038
4039 /*
4040 * No need to care about TLIs here, only used during a single run,
4041 * so each LSN only maps to a specific WAL record.
4042 */
4044 curOpenSegNo);
4045
4046 /* open segment, create it if necessary */
4047 fd = OpenTransientFile(path,
4049
4050 if (fd < 0)
4051 ereport(ERROR,
4053 errmsg("could not open file \"%s\": %m", path)));
4054 }
4055
4056 ReorderBufferSerializeChange(rb, txn, fd, change);
4057 dlist_delete(&change->node);
4058 ReorderBufferFreeChange(rb, change, false);
4059
4060 spilled++;
4061 }
4062
4063 /* Update the memory counter */
4064 ReorderBufferChangeMemoryUpdate(rb, NULL, txn, false, size);
4065
4066 /* update the statistics iff we have spilled anything */
4067 if (spilled)
4068 {
4069 rb->spillCount += 1;
4070 rb->spillBytes += size;
4071
4072 /* don't consider already serialized transactions */
4073 rb->spillTxns += (rbtxn_is_serialized(txn) || rbtxn_is_serialized_clear(txn)) ? 0 : 1;
4074
4075 /* update the decoding stats */
4077 }
4078
4079 Assert(spilled == txn->nentries_mem);
4081 txn->nentries_mem = 0;
4083
4084 if (fd != -1)
4086}
4087
4088/*
4089 * Serialize individual change to disk.
4090 */
4091static void
4093 int fd, ReorderBufferChange *change)
4094{
4097
4099
4100 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4101 memcpy(&ondisk->change, change, sizeof(ReorderBufferChange));
4102
4103 switch (change->action)
4104 {
4105 /* fall through these, they're all similar enough */
4110 {
4111 char *data;
4113 newtup;
4114 Size oldlen = 0;
4115 Size newlen = 0;
4116
4117 oldtup = change->data.tp.oldtuple;
4118 newtup = change->data.tp.newtuple;
4119
4120 if (oldtup)
4121 {
4122 sz += sizeof(HeapTupleData);
4123 oldlen = oldtup->t_len;
4124 sz += oldlen;
4125 }
4126
4127 if (newtup)
4128 {
4129 sz += sizeof(HeapTupleData);
4130 newlen = newtup->t_len;
4131 sz += newlen;
4132 }
4133
4134 /* make sure we have enough space */
4136
4137 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4138 /* might have been reallocated above */
4139 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4140
4141 if (oldlen)
4142 {
4143 memcpy(data, oldtup, sizeof(HeapTupleData));
4144 data += sizeof(HeapTupleData);
4145
4146 memcpy(data, oldtup->t_data, oldlen);
4147 data += oldlen;
4148 }
4149
4150 if (newlen)
4151 {
4152 memcpy(data, newtup, sizeof(HeapTupleData));
4153 data += sizeof(HeapTupleData);
4154
4155 memcpy(data, newtup->t_data, newlen);
4156 data += newlen;
4157 }
4158 break;
4159 }
4161 {
4162 char *data;
4163 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4164
4165 sz += prefix_size + change->data.msg.message_size +
4166 sizeof(Size) + sizeof(Size);
4168
4169 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4170
4171 /* might have been reallocated above */
4172 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4173
4174 /* write the prefix including the size */
4175 memcpy(data, &prefix_size, sizeof(Size));
4176 data += sizeof(Size);
4177 memcpy(data, change->data.msg.prefix,
4178 prefix_size);
4179 data += prefix_size;
4180
4181 /* write the message including the size */
4182 memcpy(data, &change->data.msg.message_size, sizeof(Size));
4183 data += sizeof(Size);
4184 memcpy(data, change->data.msg.message,
4185 change->data.msg.message_size);
4186 data += change->data.msg.message_size;
4187
4188 break;
4189 }
4191 {
4192 char *data;
4194 change->data.inval.ninvalidations;
4195
4196 sz += inval_size;
4197
4199 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4200
4201 /* might have been reallocated above */
4202 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4204 data += inval_size;
4205
4206 break;
4207 }
4209 {
4210 Snapshot snap;
4211 char *data;
4212
4213 snap = change->data.snapshot;
4214
4215 sz += sizeof(SnapshotData) +
4216 sizeof(TransactionId) * snap->xcnt +
4217 sizeof(TransactionId) * snap->subxcnt;
4218
4219 /* make sure we have enough space */
4221 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4222 /* might have been reallocated above */
4223 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4224
4225 memcpy(data, snap, sizeof(SnapshotData));
4226 data += sizeof(SnapshotData);
4227
4228 if (snap->xcnt)
4229 {
4230 memcpy(data, snap->xip,
4231 sizeof(TransactionId) * snap->xcnt);
4232 data += sizeof(TransactionId) * snap->xcnt;
4233 }
4234
4235 if (snap->subxcnt)
4236 {
4237 memcpy(data, snap->subxip,
4238 sizeof(TransactionId) * snap->subxcnt);
4239 data += sizeof(TransactionId) * snap->subxcnt;
4240 }
4241 break;
4242 }
4244 {
4245 Size size;
4246 char *data;
4247
4248 /* account for the OIDs of truncated relations */
4249 size = sizeof(Oid) * change->data.truncate.nrelids;
4250 sz += size;
4251
4252 /* make sure we have enough space */
4254
4255 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4256 /* might have been reallocated above */
4257 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4258
4259 memcpy(data, change->data.truncate.relids, size);
4260 data += size;
4261
4262 break;
4263 }
4268 /* ReorderBufferChange contains everything important */
4269 break;
4270 }
4271
4272 ondisk->size = sz;
4273
4274 errno = 0;
4276 if (write(fd, rb->outbuf, ondisk->size) != ondisk->size)
4277 {
4278 int save_errno = errno;
4279
4281
4282 /* if write didn't set errno, assume problem is no disk space */
4284 ereport(ERROR,
4286 errmsg("could not write to data file for XID %u: %m",
4287 txn->xid)));
4288 }
4290
4291 /*
4292 * Keep the transaction's final_lsn up to date with each change we send to
4293 * disk, so that ReorderBufferRestoreCleanup works correctly. (We used to
4294 * only do this on commit and abort records, but that doesn't work if a
4295 * system crash leaves a transaction without its abort record).
4296 *
4297 * Make sure not to move it backwards.
4298 */
4299 if (txn->final_lsn < change->lsn)
4300 txn->final_lsn = change->lsn;
4301
4302 Assert(ondisk->change.action == change->action);
4303}
4304
4305/* Returns true, if the output plugin supports streaming, false, otherwise. */
4306static inline bool
4308{
4309 LogicalDecodingContext *ctx = rb->private_data;
4310
4311 return ctx->streaming;
4312}
4313
4314/* Returns true, if the streaming can be started now, false, otherwise. */
4315static inline bool
4317{
4318 LogicalDecodingContext *ctx = rb->private_data;
4319 SnapBuild *builder = ctx->snapshot_builder;
4320
4321 /* We can't start streaming unless a consistent state is reached. */
4323 return false;
4324
4325 /*
4326 * We can't start streaming immediately even if the streaming is enabled
4327 * because we previously decoded this transaction and now just are
4328 * restarting.
4329 */
4331 !SnapBuildXactNeedsSkip(builder, ctx->reader->ReadRecPtr))
4332 return true;
4333
4334 return false;
4335}
4336
4337/*
4338 * Send data of a large transaction (and its subtransactions) to the
4339 * output plugin, but using the stream API.
4340 */
4341static void
4343{
4344 Snapshot snapshot_now;
4345 CommandId command_id;
4346 Size stream_bytes;
4347 bool txn_is_streamed;
4348
4349 /* We can never reach here for a subtransaction. */
4350 Assert(rbtxn_is_toptxn(txn));
4351
4352 /*
4353 * We can't make any assumptions about base snapshot here, similar to what
4354 * ReorderBufferCommit() does. That relies on base_snapshot getting
4355 * transferred from subxact in ReorderBufferCommitChild(), but that was
4356 * not yet called as the transaction is in-progress.
4357 *
4358 * So just walk the subxacts and use the same logic here. But we only need
4359 * to do that once, when the transaction is streamed for the first time.
4360 * After that we need to reuse the snapshot from the previous run.
4361 *
4362 * Unlike DecodeCommit which adds xids of all the subtransactions in
4363 * snapshot's xip array via SnapBuildCommitTxn, we can't do that here but
4364 * we do add them to subxip array instead via ReorderBufferCopySnap. This
4365 * allows the catalog changes made in subtransactions decoded till now to
4366 * be visible.
4367 */
4368 if (txn->snapshot_now == NULL)
4369 {
4371
4372 /* make sure this transaction is streamed for the first time */
4374
4375 /* at the beginning we should have invalid command ID */
4377
4379 {
4381
4384 }
4385
4386 /*
4387 * If this transaction has no snapshot, it didn't make any changes to
4388 * the database till now, so there's nothing to decode.
4389 */
4390 if (txn->base_snapshot == NULL)
4391 {
4392 Assert(txn->ninvalidations == 0);
4393 return;
4394 }
4395
4396 command_id = FirstCommandId;
4397 snapshot_now = ReorderBufferCopySnap(rb, txn->base_snapshot,
4398 txn, command_id);
4399 }
4400 else
4401 {
4402 /* the transaction must have been already streamed */
4404
4405 /*
4406 * Nah, we already have snapshot from the previous streaming run. We
4407 * assume new subxacts can't move the LSN backwards, and so can't beat
4408 * the LSN condition in the previous branch (so no need to walk
4409 * through subxacts again). In fact, we must not do that as we may be
4410 * using snapshot half-way through the subxact.
4411 */
4412 command_id = txn->command_id;
4413
4414 /*
4415 * We can't use txn->snapshot_now directly because after the last
4416 * streaming run, we might have got some new sub-transactions. So we
4417 * need to add them to the snapshot.
4418 */
4419 snapshot_now = ReorderBufferCopySnap(rb, txn->snapshot_now,
4420 txn, command_id);
4421
4422 /* Free the previously copied snapshot. */
4423 Assert(txn->snapshot_now->copied);
4425 txn->snapshot_now = NULL;
4426 }
4427
4428 /*
4429 * Remember this information to be used later to update stats. We can't
4430 * update the stats here as an error while processing the changes would
4431 * lead to the accumulation of stats even though we haven't streamed all
4432 * the changes.
4433 */
4435 stream_bytes = txn->total_size;
4436
4437 /* Process and send the changes to output plugin. */
4438 ReorderBufferProcessTXN(rb, txn, InvalidXLogRecPtr, snapshot_now,
4439 command_id, true);
4440
4441 rb->streamCount += 1;
4442 rb->streamBytes += stream_bytes;
4443
4444 /* Don't consider already streamed transaction. */
4445 rb->streamTxns += (txn_is_streamed) ? 0 : 1;
4446
4447 /* update the decoding stats */
4449
4451 Assert(txn->nentries == 0);
4452 Assert(txn->nentries_mem == 0);
4453}
4454
4455/*
4456 * Size of a change in memory.
4457 */
4458static Size
4460{
4461 Size sz = sizeof(ReorderBufferChange);
4462
4463 switch (change->action)
4464 {
4465 /* fall through these, they're all similar enough */
4470 {
4472 newtup;
4473 Size oldlen = 0;
4474 Size newlen = 0;
4475
4476 oldtup = change->data.tp.oldtuple;
4477 newtup = change->data.tp.newtuple;
4478
4479 if (oldtup)
4480 {
4481 sz += sizeof(HeapTupleData);
4482 oldlen = oldtup->t_len;
4483 sz += oldlen;
4484 }
4485
4486 if (newtup)
4487 {
4488 sz += sizeof(HeapTupleData);
4489 newlen = newtup->t_len;
4490 sz += newlen;
4491 }
4492
4493 break;
4494 }
4496 {
4497 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4498
4499 sz += prefix_size + change->data.msg.message_size +
4500 sizeof(Size) + sizeof(Size);
4501
4502 break;
4503 }
4505 {
4506 sz += sizeof(SharedInvalidationMessage) *
4507 change->data.inval.ninvalidations;
4508 break;
4509 }
4511 {
4512 Snapshot snap;
4513
4514 snap = change->data.snapshot;
4515
4516 sz += sizeof(SnapshotData) +
4517 sizeof(TransactionId) * snap->xcnt +
4518 sizeof(TransactionId) * snap->subxcnt;
4519
4520 break;
4521 }
4523 {
4524 sz += sizeof(Oid) * change->data.truncate.nrelids;
4525
4526 break;
4527 }
4532 /* ReorderBufferChange contains everything important */
4533 break;
4534 }
4535
4536 return sz;
4537}
4538
4539
4540/*
4541 * Restore a number of changes spilled to disk back into memory.
4542 */
4543static Size
4545 TXNEntryFile *file, XLogSegNo *segno)
4546{
4547 Size restored = 0;
4550 File *fd = &file->vfd;
4551
4554
4555 /* free current entries, so we have memory for more */
4557 {
4560
4561 dlist_delete(&cleanup->node);
4563 }
4564 txn->nentries_mem = 0;
4566
4568
4569 while (restored < max_changes_in_memory && *segno <= last_segno)
4570 {
4571 int readBytes;
4573
4575
4576 if (*fd == -1)
4577 {
4578 char path[MAXPGPATH];
4579
4580 /* first time in */
4581 if (*segno == 0)
4582 XLByteToSeg(txn->first_lsn, *segno, wal_segment_size);
4583
4584 Assert(*segno != 0 || dlist_is_empty(&txn->changes));
4585
4586 /*
4587 * No need to care about TLIs here, only used during a single run,
4588 * so each LSN only maps to a specific WAL record.
4589 */
4591 *segno);
4592
4594
4595 /* No harm in resetting the offset even in case of failure */
4596 file->curOffset = 0;
4597
4598 if (*fd < 0 && errno == ENOENT)
4599 {
4600 *fd = -1;
4601 (*segno)++;
4602 continue;
4603 }
4604 else if (*fd < 0)
4605 ereport(ERROR,
4607 errmsg("could not open file \"%s\": %m",
4608 path)));
4609 }
4610
4611 /*
4612 * Read the statically sized part of a change which has information
4613 * about the total size. If we couldn't read a record, we're at the
4614 * end of this file.
4615 */
4617 readBytes = FileRead(file->vfd, rb->outbuf,
4620
4621 /* eof */
4622 if (readBytes == 0)
4623 {
4624 FileClose(*fd);
4625 *fd = -1;
4626 (*segno)++;
4627 continue;
4628 }
4629 else if (readBytes < 0)
4630 ereport(ERROR,
4632 errmsg("could not read from reorderbuffer spill file: %m")));
4633 else if (readBytes != sizeof(ReorderBufferDiskChange))
4634 ereport(ERROR,
4636 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4637 readBytes,
4638 (uint32) sizeof(ReorderBufferDiskChange))));
4639
4640 file->curOffset += readBytes;
4641
4642 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4643
4645 sizeof(ReorderBufferDiskChange) + ondisk->size);
4646 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4647
4648 readBytes = FileRead(file->vfd,
4649 rb->outbuf + sizeof(ReorderBufferDiskChange),
4650 ondisk->size - sizeof(ReorderBufferDiskChange),
4651 file->curOffset,
4653
4654 if (readBytes < 0)
4655 ereport(ERROR,
4657 errmsg("could not read from reorderbuffer spill file: %m")));
4658 else if (readBytes != ondisk->size - sizeof(ReorderBufferDiskChange))
4659 ereport(ERROR,
4661 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4662 readBytes,
4663 (uint32) (ondisk->size - sizeof(ReorderBufferDiskChange)))));
4664
4665 file->curOffset += readBytes;
4666
4667 /*
4668 * ok, read a full change from disk, now restore it into proper
4669 * in-memory format
4670 */
4671 ReorderBufferRestoreChange(rb, txn, rb->outbuf);
4672 restored++;
4673 }
4674
4675 return restored;
4676}
4677
4678/*
4679 * Convert change from its on-disk format to in-memory format and queue it onto
4680 * the TXN's ->changes list.
4681 *
4682 * Note: although "data" is declared char*, at entry it points to a
4683 * maxalign'd buffer, making it safe in most of this function to assume
4684 * that the pointed-to data is suitably aligned for direct access.
4685 */
4686static void
4688 char *data)
4689{
4691 ReorderBufferChange *change;
4692
4693 ondisk = (ReorderBufferDiskChange *) data;
4694
4695 change = ReorderBufferAllocChange(rb);
4696
4697 /* copy static part */
4698 memcpy(change, &ondisk->change, sizeof(ReorderBufferChange));
4699
4700 data += sizeof(ReorderBufferDiskChange);
4701
4702 /* restore individual stuff */
4703 switch (change->action)
4704 {
4705 /* fall through these, they're all similar enough */
4710 if (change->data.tp.oldtuple)
4711 {
4712 uint32 tuplelen = ((HeapTuple) data)->t_len;
4713
4714 change->data.tp.oldtuple =
4716
4717 /* restore ->tuple */
4718 memcpy(change->data.tp.oldtuple, data,
4719 sizeof(HeapTupleData));
4720 data += sizeof(HeapTupleData);
4721
4722 /* reset t_data pointer into the new tuplebuf */
4723 change->data.tp.oldtuple->t_data =
4724 (HeapTupleHeader) ((char *) change->data.tp.oldtuple + HEAPTUPLESIZE);
4725
4726 /* restore tuple data itself */
4728 data += tuplelen;
4729 }
4730
4731 if (change->data.tp.newtuple)
4732 {
4733 /* here, data might not be suitably aligned! */
4735
4737 sizeof(uint32));
4738
4739 change->data.tp.newtuple =
4741
4742 /* restore ->tuple */
4743 memcpy(change->data.tp.newtuple, data,
4744 sizeof(HeapTupleData));
4745 data += sizeof(HeapTupleData);
4746
4747 /* reset t_data pointer into the new tuplebuf */
4748 change->data.tp.newtuple->t_data =
4749 (HeapTupleHeader) ((char *) change->data.tp.newtuple + HEAPTUPLESIZE);
4750
4751 /* restore tuple data itself */
4753 data += tuplelen;
4754 }
4755
4756 break;
4758 {
4759 Size prefix_size;
4760
4761 /* read prefix */
4762 memcpy(&prefix_size, data, sizeof(Size));
4763 data += sizeof(Size);
4764 change->data.msg.prefix = MemoryContextAlloc(rb->context,
4765 prefix_size);
4766 memcpy(change->data.msg.prefix, data, prefix_size);
4767 Assert(change->data.msg.prefix[prefix_size - 1] == '\0');
4768 data += prefix_size;
4769
4770 /* read the message */
4771 memcpy(&change->data.msg.message_size, data, sizeof(Size));
4772 data += sizeof(Size);
4773 change->data.msg.message = MemoryContextAlloc(rb->context,
4774 change->data.msg.message_size);
4775 memcpy(change->data.msg.message, data,
4776 change->data.msg.message_size);
4777 data += change->data.msg.message_size;
4778
4779 break;
4780 }
4782 {
4784 change->data.inval.ninvalidations;
4785
4786 change->data.inval.invalidations =
4787 MemoryContextAlloc(rb->context, inval_size);
4788
4789 /* read the message */
4791
4792 break;
4793 }
4795 {
4798 Size size;
4799
4800 oldsnap = (Snapshot) data;
4801
4802 size = sizeof(SnapshotData) +
4803 sizeof(TransactionId) * oldsnap->xcnt +
4804 sizeof(TransactionId) * (oldsnap->subxcnt + 0);
4805
4806 change->data.snapshot = MemoryContextAllocZero(rb->context, size);
4807
4808 newsnap = change->data.snapshot;
4809
4810 memcpy(newsnap, data, size);
4811 newsnap->xip = (TransactionId *)
4812 (((char *) newsnap) + sizeof(SnapshotData));
4813 newsnap->subxip = newsnap->xip + newsnap->xcnt;
4814 newsnap->copied = true;
4815 break;
4816 }
4817 /* the base struct contains all the data, easy peasy */
4819 {
4820 Oid *relids;
4821
4822 relids = ReorderBufferAllocRelids(rb, change->data.truncate.nrelids);
4823 memcpy(relids, data, change->data.truncate.nrelids * sizeof(Oid));
4824 change->data.truncate.relids = relids;
4825
4826 break;
4827 }
4832 break;
4833 }
4834
4835 dlist_push_tail(&txn->changes, &change->node);
4836 txn->nentries_mem++;
4837
4838 /*
4839 * Update memory accounting for the restored change. We need to do this
4840 * although we don't check the memory limit when restoring the changes in
4841 * this branch (we only do that when initially queueing the changes after
4842 * decoding), because we will release the changes later, and that will
4843 * update the accounting too (subtracting the size from the counters). And
4844 * we don't want to underflow there.
4845 */
4847 ReorderBufferChangeSize(change));
4848}
4849
4850/*
4851 * Remove all on-disk stored for the passed in transaction.
4852 */
4853static void
4855{
4856 XLogSegNo first;
4857 XLogSegNo cur;
4858 XLogSegNo last;
4859
4862
4865
4866 /* iterate over all possible filenames, and delete them */
4867 for (cur = first; cur <= last; cur++)
4868 {
4869 char path[MAXPGPATH];
4870
4872 if (unlink(path) != 0 && errno != ENOENT)
4873 ereport(ERROR,
4875 errmsg("could not remove file \"%s\": %m", path)));
4876 }
4877}
4878
4879/*
4880 * Remove any leftover serialized reorder buffers from a slot directory after a
4881 * prior crash or decoding session exit.
4882 */
4883static void
4884ReorderBufferCleanupSerializedTXNs(const char *slotname)
4885{
4886 DIR *spill_dir;
4887 struct dirent *spill_de;
4888 struct stat statbuf;
4889 char path[MAXPGPATH * 2 + sizeof(PG_REPLSLOT_DIR)];
4890
4891 sprintf(path, "%s/%s", PG_REPLSLOT_DIR, slotname);
4892
4893 /* we're only handling directories here, skip if it's not ours */
4894 if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
4895 return;
4896
4897 spill_dir = AllocateDir(path);
4898 while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL)
4899 {
4900 /* only look at names that can be ours */
4901 if (strncmp(spill_de->d_name, "xid", 3) == 0)
4902 {
4903 snprintf(path, sizeof(path),
4904 "%s/%s/%s", PG_REPLSLOT_DIR, slotname,
4905 spill_de->d_name);
4906
4907 if (unlink(path) != 0)
4908 ereport(ERROR,
4910 errmsg("could not remove file \"%s\" during removal of %s/%s/xid*: %m",
4911 path, PG_REPLSLOT_DIR, slotname)));
4912 }
4913 }
4915}
4916
4917/*
4918 * Given a replication slot, transaction ID and segment number, fill in the
4919 * corresponding spill file into 'path', which is a caller-owned buffer of size
4920 * at least MAXPGPATH.
4921 */
4922static void
4924 XLogSegNo segno)
4925{
4927
4929
4930 snprintf(path, MAXPGPATH, "%s/%s/xid-%u-lsn-%X-%X.spill",
4933 xid, LSN_FORMAT_ARGS(recptr));
4934}
4935
4936/*
4937 * Delete all data spilled to disk after we've restarted/crashed. It will be
4938 * recreated when the respective slots are reused.
4939 */
4940void
4942{
4944 struct dirent *logical_de;
4945
4948 {
4949 if (strcmp(logical_de->d_name, ".") == 0 ||
4950 strcmp(logical_de->d_name, "..") == 0)
4951 continue;
4952
4953 /* if it cannot be a slot, skip the directory */
4954 if (!ReplicationSlotValidateName(logical_de->d_name, true, DEBUG2))
4955 continue;
4956
4957 /*
4958 * ok, has to be a surviving logical slot, iterate and delete
4959 * everything starting with xid-*
4960 */
4962 }
4964}
4965
4966/* ---------------------------------------
4967 * toast reassembly support
4968 * ---------------------------------------
4969 */
4970
4971/*
4972 * Initialize per tuple toast reconstruction support.
4973 */
4974static void
4976{
4978
4979 Assert(txn->toast_hash == NULL);
4980
4981 hash_ctl.keysize = sizeof(Oid);
4982 hash_ctl.entrysize = sizeof(ReorderBufferToastEnt);
4983 hash_ctl.hcxt = rb->context;
4984 txn->toast_hash = hash_create("ReorderBufferToastHash", 5, &hash_ctl,
4986}
4987
4988/*
4989 * Per toast-chunk handling for toast reconstruction
4990 *
4991 * Appends a toast chunk so we can reconstruct it when the tuple "owning" the
4992 * toasted Datum comes along.
4993 */
4994static void
4996 Relation relation, ReorderBufferChange *change)
4997{
5000 bool found;
5002 bool isnull;
5003 Pointer chunk;
5004 TupleDesc desc = RelationGetDescr(relation);
5005 Oid chunk_id;
5007
5008 if (txn->toast_hash == NULL)
5010
5011 Assert(IsToastRelation(relation));
5012
5013 newtup = change->data.tp.newtuple;
5014 chunk_id = DatumGetObjectId(fastgetattr(newtup, 1, desc, &isnull));
5015 Assert(!isnull);
5016 chunk_seq = DatumGetInt32(fastgetattr(newtup, 2, desc, &isnull));
5017 Assert(!isnull);
5018
5020 hash_search(txn->toast_hash, &chunk_id, HASH_ENTER, &found);
5021
5022 if (!found)
5023 {
5024 Assert(ent->chunk_id == chunk_id);
5025 ent->num_chunks = 0;
5026 ent->last_chunk_seq = 0;
5027 ent->size = 0;
5028 ent->reconstructed = NULL;
5029 dlist_init(&ent->chunks);
5030
5031 if (chunk_seq != 0)
5032 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq 0",
5033 chunk_seq, chunk_id);
5034 }
5035 else if (found && chunk_seq != ent->last_chunk_seq + 1)
5036 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq %d",
5037 chunk_seq, chunk_id, ent->last_chunk_seq + 1);
5038
5039 chunk = DatumGetPointer(fastgetattr(newtup, 3, desc, &isnull));
5040 Assert(!isnull);
5041
5042 /* calculate size so we can allocate the right size at once later */
5045 else if (VARATT_IS_SHORT(chunk))
5046 /* could happen due to heap_form_tuple doing its thing */
5048 else
5049 elog(ERROR, "unexpected type of toast chunk");
5050
5051 ent->size += chunksize;
5052 ent->last_chunk_seq = chunk_seq;
5053 ent->num_chunks++;
5054 dlist_push_tail(&ent->chunks, &change->node);
5055}
5056
5057/*
5058 * Rejigger change->newtuple to point to in-memory toast tuples instead of
5059 * on-disk toast tuples that may no longer exist (think DROP TABLE or VACUUM).
5060 *
5061 * We cannot replace unchanged toast tuples though, so those will still point
5062 * to on-disk toast data.
5063 *
5064 * While updating the existing change with detoasted tuple data, we need to
5065 * update the memory accounting info, because the change size will differ.
5066 * Otherwise the accounting may get out of sync, triggering serialization
5067 * at unexpected times.
5068 *
5069 * We simply subtract size of the change before rejiggering the tuple, and
5070 * then add the new size. This makes it look like the change was removed
5071 * and then added back, except it only tweaks the accounting info.
5072 *
5073 * In particular it can't trigger serialization, which would be pointless
5074 * anyway as it happens during commit processing right before handing
5075 * the change to the output plugin.
5076 */
5077static void
5079 Relation relation, ReorderBufferChange *change)
5080{
5081 TupleDesc desc;
5082 int natt;
5083 Datum *attrs;
5084 bool *isnull;
5085 bool *free;
5087 Relation toast_rel;
5089 MemoryContext oldcontext;
5091 Size old_size;
5092
5093 /* no toast tuples changed */
5094 if (txn->toast_hash == NULL)
5095 return;
5096
5097 /*
5098 * We're going to modify the size of the change. So, to make sure the
5099 * accounting is correct we record the current change size and then after
5100 * re-computing the change we'll subtract the recorded size and then
5101 * re-add the new change size at the end. We don't immediately subtract
5102 * the old size because if there is any error before we add the new size,
5103 * we will release the changes and that will update the accounting info
5104 * (subtracting the size from the counters). And we don't want to
5105 * underflow there.
5106 */
5108
5109 oldcontext = MemoryContextSwitchTo(rb->context);
5110
5111 /* we should only have toast tuples in an INSERT or UPDATE */
5112 Assert(change->data.tp.newtuple);
5113
5114 desc = RelationGetDescr(relation);
5115
5116 toast_rel = RelationIdGetRelation(relation->rd_rel->reltoastrelid);
5117 if (!RelationIsValid(toast_rel))
5118 elog(ERROR, "could not open toast relation with OID %u (base relation \"%s\")",
5119 relation->rd_rel->reltoastrelid, RelationGetRelationName(relation));
5120
5121 toast_desc = RelationGetDescr(toast_rel);
5122
5123 /* should we allocate from stack instead? */
5124 attrs = palloc0_array(Datum, desc->natts);
5125 isnull = palloc0_array(bool, desc->natts);
5126 free = palloc0_array(bool, desc->natts);
5127
5128 newtup = change->data.tp.newtuple;
5129
5130 heap_deform_tuple(newtup, desc, attrs, isnull);
5131
5132 for (natt = 0; natt < desc->natts; natt++)
5133 {
5137
5138 /* va_rawsize is the size of the original datum -- including header */
5139 varatt_external toast_pointer;
5142 varlena *reconstructed;
5143 dlist_iter it;
5144 Size data_done = 0;
5145
5146 if (attr->attisdropped)
5147 continue;
5148
5149 /* not a varlena datatype */
5150 if (attr->attlen != -1)
5151 continue;
5152
5153 /* no data */
5154 if (isnull[natt])
5155 continue;
5156
5157 /* ok, we know we have a toast datum */
5159
5160 /* no need to do anything if the tuple isn't external */
5162 continue;
5163
5165
5166 /*
5167 * Check whether the toast tuple changed, replace if so.
5168 */
5171 &toast_pointer.va_valueid,
5172 HASH_FIND,
5173 NULL);
5174 if (ent == NULL)
5175 continue;
5176
5177 new_datum =
5179
5180 free[natt] = true;
5181
5182 reconstructed = palloc0(toast_pointer.va_rawsize);
5183
5184 ent->reconstructed = reconstructed;
5185
5186 /* stitch toast tuple back together from its parts */
5187 dlist_foreach(it, &ent->chunks)
5188 {
5189 bool cisnull;
5192 Pointer chunk;
5193
5195 ctup = cchange->data.tp.newtuple;
5197
5198 Assert(!cisnull);
5201
5202 memcpy(VARDATA(reconstructed) + data_done,
5203 VARDATA(chunk),
5206 }
5207 Assert(data_done == VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer));
5208
5209 /* make sure its marked as compressed or not */
5210 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
5211 SET_VARSIZE_COMPRESSED(reconstructed, data_done + VARHDRSZ);
5212 else
5213 SET_VARSIZE(reconstructed, data_done + VARHDRSZ);
5214
5216 redirect_pointer.pointer = reconstructed;
5217
5220 sizeof(redirect_pointer));
5221
5223 }
5224
5225 /*
5226 * Build tuple in separate memory & copy tuple back into the tuplebuf
5227 * passed to the output plugin. We can't directly heap_fill_tuple() into
5228 * the tuplebuf because attrs[] will point back into the current content.
5229 */
5230 tmphtup = heap_form_tuple(desc, attrs, isnull);
5231 Assert(newtup->t_len <= MaxHeapTupleSize);
5232 Assert(newtup->t_data == (HeapTupleHeader) ((char *) newtup + HEAPTUPLESIZE));
5233
5234 memcpy(newtup->t_data, tmphtup->t_data, tmphtup->t_len);
5235 newtup->t_len = tmphtup->t_len;
5236
5237 /*
5238 * free resources we won't further need, more persistent stuff will be
5239 * free'd in ReorderBufferToastReset().
5240 */
5241 RelationClose(toast_rel);
5242 pfree(tmphtup);
5243 for (natt = 0; natt < desc->natts; natt++)
5244 {
5245 if (free[natt])
5247 }
5248 pfree(attrs);
5249 pfree(free);
5250 pfree(isnull);
5251
5252 MemoryContextSwitchTo(oldcontext);
5253
5254 /* subtract the old change size */
5256 /* now add the change back, with the correct size */
5258 ReorderBufferChangeSize(change));
5259}
5260
5261/*
5262 * Free all resources allocated for toast reconstruction.
5263 */
5264static void
5266{
5269
5270 if (txn->toast_hash == NULL)
5271 return;
5272
5273 /* sequentially walk over the hash and free everything */
5276 {
5278
5279 if (ent->reconstructed != NULL)
5280 pfree(ent->reconstructed);
5281
5282 dlist_foreach_modify(it, &ent->chunks)
5283 {
5284 ReorderBufferChange *change =
5286
5287 dlist_delete(&change->node);
5288 ReorderBufferFreeChange(rb, change, true);
5289 }
5290 }
5291
5293 txn->toast_hash = NULL;
5294}
5295
5296
5297/* ---------------------------------------
5298 * Visibility support for logical decoding
5299 *
5300 *
5301 * Lookup actual cmin/cmax values when using decoding snapshot. We can't
5302 * always rely on stored cmin/cmax values because of two scenarios:
5303 *
5304 * * A tuple got changed multiple times during a single transaction and thus
5305 * has got a combo CID. Combo CIDs are only valid for the duration of a
5306 * single transaction.
5307 * * A tuple with a cmin but no cmax (and thus no combo CID) got
5308 * deleted/updated in another transaction than the one which created it
5309 * which we are looking at right now. As only one of cmin, cmax or combo CID
5310 * is actually stored in the heap we don't have access to the value we
5311 * need anymore.
5312 *
5313 * To resolve those problems we have a per-transaction hash of (cmin,
5314 * cmax) tuples keyed by (relfilelocator, ctid) which contains the actual
5315 * (cmin, cmax) values. That also takes care of combo CIDs by simply
5316 * not caring about them at all. As we have the real cmin/cmax values
5317 * combo CIDs aren't interesting.
5318 *
5319 * As we only care about catalog tuples here the overhead of this
5320 * hashtable should be acceptable.
5321 *
5322 * Heap rewrites complicate this a bit, check rewriteheap.c for
5323 * details.
5324 * -------------------------------------------------------------------------
5325 */
5326
5327/* struct for sorting mapping files by LSN efficiently */
5328typedef struct RewriteMappingFile
5329{
5331 char fname[MAXPGPATH];
5333
5334#ifdef NOT_USED
5335static void
5337{
5340
5343 {
5344 elog(DEBUG3, "mapping: node: %u/%u/%u tid: %u/%u cmin: %u, cmax: %u",
5345 ent->key.rlocator.dbOid,
5346 ent->key.rlocator.spcOid,
5347 ent->key.rlocator.relNumber,
5348 ItemPointerGetBlockNumber(&ent->key.tid),
5350 ent->cmin,
5351 ent->cmax
5352 );
5353 }
5354}
5355#endif
5356
5357/*
5358 * Apply a single mapping file to tuplecid_data.
5359 *
5360 * The mapping file has to have been verified to be a) committed b) for our
5361 * transaction c) applied in LSN order.
5362 */
5363static void
5364ApplyLogicalMappingFile(HTAB *tuplecid_data, const char *fname)
5365{
5366 char path[MAXPGPATH];
5367 int fd;
5368 int readBytes;
5370
5371 sprintf(path, "%s/%s", PG_LOGICAL_MAPPINGS_DIR, fname);
5373 if (fd < 0)
5374 ereport(ERROR,
5376 errmsg("could not open file \"%s\": %m", path)));
5377
5378 while (true)
5379 {
5383 bool found;
5384
5385 /* be careful about padding */
5386 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
5387
5388 /* read all mappings till the end of the file */
5390 readBytes = read(fd, &map, sizeof(LogicalRewriteMappingData));
5392
5393 if (readBytes < 0)
5394 ereport(ERROR,
5396 errmsg("could not read file \"%s\": %m",
5397 path)));
5398 else if (readBytes == 0) /* EOF */
5399 break;
5400 else if (readBytes != sizeof(LogicalRewriteMappingData))
5401 ereport(ERROR,
5403 errmsg("could not read from file \"%s\": read %d instead of %d bytes",
5404 path, readBytes,
5405 (int32) sizeof(LogicalRewriteMappingData))));
5406
5407 key.rlocator = map.old_locator;
5409 &key.tid);
5410
5411
5414
5415 /* no existing mapping, no need to update */
5416 if (!ent)
5417 continue;
5418
5419 key.rlocator = map.new_locator;
5421 &key.tid);
5422
5424 hash_search(tuplecid_data, &key, HASH_ENTER, &found);
5425
5426 if (found)
5427 {
5428 /*
5429 * Make sure the existing mapping makes sense. We sometime update
5430 * old records that did not yet have a cmax (e.g. pg_class' own
5431 * entry while rewriting it) during rewrites, so allow that.
5432 */
5433 Assert(ent->cmin == InvalidCommandId || ent->cmin == new_ent->cmin);
5434 Assert(ent->cmax == InvalidCommandId || ent->cmax == new_ent->cmax);
5435 }
5436 else
5437 {
5438 /* update mapping */
5439 new_ent->cmin = ent->cmin;
5440 new_ent->cmax = ent->cmax;
5441 new_ent->combocid = ent->combocid;
5442 }
5443 }
5444
5445 if (CloseTransientFile(fd) != 0)
5446 ereport(ERROR,
5448 errmsg("could not close file \"%s\": %m", path)));
5449}
5450
5451
5452/*
5453 * Check whether the TransactionId 'xid' is in the pre-sorted array 'xip'.
5454 */
5455static bool
5457{
5458 return bsearch(&xid, xip, num,
5459 sizeof(TransactionId), xidComparator) != NULL;
5460}
5461
5462/*
5463 * list_sort() comparator for sorting RewriteMappingFiles in LSN order.
5464 */
5465static int
5467{
5470
5471 return pg_cmp_u64(a->lsn, b->lsn);
5472}
5473
5474/*
5475 * Apply any existing logical remapping files if there are any targeted at our
5476 * transaction for relid.
5477 */
5478static void
5480{
5482 struct dirent *mapping_de;
5483 List *files = NIL;
5484 ListCell *file;
5485 Oid dboid = IsSharedRelation(relid) ? InvalidOid : MyDatabaseId;
5486
5489 {
5490 Oid f_dboid;
5491 Oid f_relid;
5495 uint32 f_hi,
5496 f_lo;
5498
5499 if (strcmp(mapping_de->d_name, ".") == 0 ||
5500 strcmp(mapping_de->d_name, "..") == 0)
5501 continue;
5502
5503 /* Ignore files that aren't ours */
5504 if (strncmp(mapping_de->d_name, "map-", 4) != 0)
5505 continue;
5506
5508 &f_dboid, &f_relid, &f_hi, &f_lo,
5509 &f_mapped_xid, &f_create_xid) != 6)
5510 elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
5511
5512 f_lsn = ((uint64) f_hi) << 32 | f_lo;
5513
5514 /* mapping for another database */
5515 if (f_dboid != dboid)
5516 continue;
5517
5518 /* mapping for another relation */
5519 if (f_relid != relid)
5520 continue;
5521
5522 /* did the creating transaction abort? */
5524 continue;
5525
5526 /* not for our transaction */
5527 if (!TransactionIdInArray(f_mapped_xid, snapshot->subxip, snapshot->subxcnt))
5528 continue;
5529
5530 /* ok, relevant, queue for apply */
5532 f->lsn = f_lsn;
5533 strcpy(f->fname, mapping_de->d_name);
5534 files = lappend(files, f);
5535 }
5537
5538 /* sort files so we apply them in LSN order */
5540
5541 foreach(file, files)
5542 {
5544
5545 elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
5546 snapshot->subxip[0]);
5548 pfree(f);
5549 }
5550}
5551
5552/*
5553 * Lookup cmin/cmax of a tuple, during logical decoding where we can't rely on
5554 * combo CIDs.
5555 */
5556bool
5558 Snapshot snapshot,
5559 HeapTuple htup, Buffer buffer,
5560 CommandId *cmin, CommandId *cmax)
5561{
5564 ForkNumber forkno;
5565 BlockNumber blockno;
5566 bool updated_mapping = false;
5567
5568 /*
5569 * Return unresolved if tuplecid_data is not valid. That's because when
5570 * streaming in-progress transactions we may run into tuples with the CID
5571 * before actually decoding them. Think e.g. about INSERT followed by
5572 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5573 * INSERT. So in such cases, we assume the CID is from the future
5574 * command.
5575 */
5576 if (tuplecid_data == NULL)
5577 return false;
5578
5579 /* be careful about padding */
5580 memset(&key, 0, sizeof(key));
5581
5582 Assert(!BufferIsLocal(buffer));
5583
5584 /*
5585 * get relfilelocator from the buffer, no convenient way to access it
5586 * other than that.
5587 */
5588 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5589
5590 /* tuples can only be in the main fork */
5591 Assert(forkno == MAIN_FORKNUM);
5592 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5593
5594 ItemPointerCopy(&htup->t_self,
5595 &key.tid);
5596
5597restart:
5600
5601 /*
5602 * failed to find a mapping, check whether the table was rewritten and
5603 * apply mapping if so, but only do that once - there can be no new
5604 * mappings while we are in here since we have to hold a lock on the
5605 * relation.
5606 */
5607 if (ent == NULL && !updated_mapping)
5608 {
5610 /* now check but don't update for a mapping again */
5611 updated_mapping = true;
5612 goto restart;
5613 }
5614 else if (ent == NULL)
5615 return false;
5616
5617 if (cmin)
5618 *cmin = ent->cmin;
5619 if (cmax)
5620 *cmax = ent->cmax;
5621 return true;
5622}
5623
5624/*
5625 * Count invalidation messages of specified transaction.
5626 *
5627 * Returns number of messages, and msgs is set to the pointer of the linked
5628 * list for the messages.
5629 */
5630uint32
5633{
5634 ReorderBufferTXN *txn;
5635
5636 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
5637 false);
5638
5639 if (txn == NULL)
5640 return 0;
5641
5642 *msgs = txn->invalidations;
5643
5644 return txn->ninvalidations;
5645}
void binaryheap_build(binaryheap *heap)
Definition binaryheap.c:136
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:253
bh_node_type binaryheap_first(binaryheap *heap)
Definition binaryheap.c:175
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition binaryheap.c:190
void binaryheap_free(binaryheap *heap)
Definition binaryheap.c:73
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:114
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition binaryheap.c:37
#define binaryheap_empty(h)
Definition binaryheap.h:65
uint32 BlockNumber
Definition block.h:31
static int32 next
Definition blutils.c:225
static void cleanup(void)
Definition bootstrap.c:838
int Buffer
Definition buf.h:23
#define BufferIsLocal(buffer)
Definition buf.h:37
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition bufmgr.c:4378
#define NameStr(name)
Definition c.h:798
#define InvalidCommandId
Definition c.h:716
#define VARHDRSZ
Definition c.h:744
#define Assert(condition)
Definition c.h:906
#define PG_BINARY
Definition c.h:1330
#define FirstCommandId
Definition c.h:715
int32_t int32
Definition c.h:575
uint64_t uint64
Definition c.h:580
#define unlikely(x)
Definition c.h:424
uint32_t uint32
Definition c.h:579
#define pg_fallthrough
Definition c.h:144
void * Pointer
Definition c.h:570
uint32 CommandId
Definition c.h:713
uint32 TransactionId
Definition c.h:699
size_t Size
Definition c.h:652
bool IsToastRelation(Relation relation)
Definition catalog.c:206
bool IsSharedRelation(Oid relationId)
Definition catalog.c:304
int64 TimestampTz
Definition timestamp.h:39
#define INDIRECT_POINTER_SIZE
Definition detoast.h:34
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr)
Definition detoast.h:22
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:358
void hash_destroy(HTAB *hashp)
Definition dynahash.c:865
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1415
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1380
struct cursor * cur
Definition ecpg.c:29
Datum arg
Definition elog.c:1322
void FreeErrorData(ErrorData *edata)
Definition elog.c:2013
int errcode_for_file_access(void)
Definition elog.c:897
ErrorData * CopyErrorData(void)
Definition elog.c:1941
void FlushErrorState(void)
Definition elog.c:2062
int errmsg(const char *fmt,...)
Definition elog.c:1093
#define PG_RE_THROW()
Definition elog.h:405
#define DEBUG3
Definition elog.h:28
#define PG_TRY(...)
Definition elog.h:372
#define DEBUG2
Definition elog.h:29
#define PG_END_TRY(...)
Definition elog.h:397
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define PG_CATCH(...)
Definition elog.h:382
#define elog(elevel,...)
Definition elog.h:226
#define INFO
Definition elog.h:34
#define ereport(elevel,...)
Definition elog.h:150
int FreeDir(DIR *dir)
Definition fd.c:3008
int CloseTransientFile(int fd)
Definition fd.c:2854
struct dirent * ReadDirExtended(DIR *dir, const char *dirname, int elevel)
Definition fd.c:2971
void FileClose(File file)
Definition fd.c:1965
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition fd.c:1562
DIR * AllocateDir(const char *dirname)
Definition fd.c:2890
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2956
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2677
static ssize_t FileRead(File file, void *buffer, size_t amount, pgoff_t offset, uint32 wait_event_info)
Definition fd.h:225
int File
Definition fd.h:51
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
MemoryContext GenerationContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition generation.c:162
Oid MyDatabaseId
Definition globals.c:94
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1117
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition heaptuple.c:1346
@ HASH_FIND
Definition hsearch.h:113
@ HASH_REMOVE
Definition hsearch.h:115
@ HASH_ENTER
Definition hsearch.h:114
#define HASH_CONTEXT
Definition hsearch.h:102
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_BLOBS
Definition hsearch.h:97
#define HEAPTUPLESIZE
Definition htup.h:73
HeapTupleData * HeapTuple
Definition htup.h:71
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
#define SizeofHeapTupleHeader
#define MaxHeapTupleSize
static Datum fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
static dlist_node * dlist_pop_head_node(dlist_head *head)
Definition ilist.h:450
#define dlist_foreach(iter, lhead)
Definition ilist.h:623
static void dlist_init(dlist_head *head)
Definition ilist.h:314
#define dclist_container(type, membername, ptr)
Definition ilist.h:947
static bool dlist_has_next(const dlist_head *head, const dlist_node *node)
Definition ilist.h:503
static void dclist_push_tail(dclist_head *head, dlist_node *node)
Definition ilist.h:709
static void dlist_insert_before(dlist_node *before, dlist_node *node)
Definition ilist.h:393
#define dlist_head_element(type, membername, lhead)
Definition ilist.h:603
static dlist_node * dlist_next_node(dlist_head *head, dlist_node *node)
Definition ilist.h:537
static void dlist_delete(dlist_node *node)
Definition ilist.h:405
static uint32 dclist_count(const dclist_head *head)
Definition ilist.h:932
#define dlist_foreach_modify(iter, lhead)
Definition ilist.h:640
static bool dlist_is_empty(const dlist_head *head)
Definition ilist.h:336
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition ilist.h:364
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition ilist.h:763
static void dclist_init(dclist_head *head)
Definition ilist.h:671
#define dlist_container(type, membername, ptr)
Definition ilist.h:593
#define dclist_foreach(iter, lhead)
Definition ilist.h:970
static int pg_cmp_u64(uint64 a, uint64 b)
Definition int.h:731
#define write(a, b, c)
Definition win32.h:14
#define read(a, b, c)
Definition win32.h:13
void LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
Definition inval.c:823
void InvalidateSystemCaches(void)
Definition inval.c:916
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition itemptr.h:172
List * lappend(List *list, void *datum)
Definition list.c:339
void list_sort(List *list, list_sort_comparator cmp)
Definition list.c:1674
void UpdateDecodingStats(LogicalDecodingContext *ctx)
Definition logical.c:1943
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition mcxt.c:1266
char * pstrdup(const char *in)
Definition mcxt.c:1781
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
void * palloc(Size size)
Definition mcxt.c:1387
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define SLAB_DEFAULT_BLOCK_SIZE
Definition memutils.h:189
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
void pairingheap_remove(pairingheap *heap, pairingheap_node *node)
void pairingheap_add(pairingheap *heap, pairingheap_node *node)
pairingheap * pairingheap_allocate(pairingheap_comparator compare, void *arg)
Definition pairingheap.c:42
pairingheap_node * pairingheap_first(pairingheap *heap)
#define pairingheap_container(type, membername, ptr)
Definition pairingheap.h:43
#define pairingheap_const_container(type, membername, ptr)
Definition pairingheap.h:51
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
#define MAXPGPATH
const void * data
#define lfirst(lc)
Definition pg_list.h:172
#define NIL
Definition pg_list.h:68
#define sprintf
Definition port.h:262
#define snprintf
Definition port.h:260
#define qsort(a, b, c, d)
Definition port.h:495
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
static Oid DatumGetObjectId(Datum X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
static Datum Int32GetDatum(int32 X)
Definition postgres.h:222
static int32 DatumGetInt32(Datum X)
Definition postgres.h:212
#define InvalidOid
unsigned int Oid
static int fd(const char *x, int i)
static int fb(int x)
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1405
#define RelationIsLogicallyLogged(relation)
Definition rel.h:710
#define RelationGetDescr(relation)
Definition rel.h:540
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationIsValid(relation)
Definition rel.h:489
Relation RelationIdGetRelation(Oid relationId)
Definition relcache.c:2094
void RelationClose(Relation relation)
Definition relcache.c:2215
Oid RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber)
ForkNumber
Definition relpath.h:56
@ MAIN_FORKNUM
Definition relpath.h:58
#define relpathperm(rlocator, forknum)
Definition relpath.h:146
static int file_sort_by_lsn(const ListCell *a_p, const ListCell *b_p)
void ReorderBufferFreeRelids(ReorderBuffer *rb, Oid *relids)
void ReorderBufferFreeChange(ReorderBuffer *rb, ReorderBufferChange *change, bool upd_mem)
static void ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
void ReorderBufferXidSetCatalogChanges(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
static void ReorderBufferStreamCommit(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferAddNewCommandId(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, CommandId cid)
static void ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferReplay(ReorderBufferTXN *txn, ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
static void ReorderBufferAccumulateInvalidations(SharedInvalidationMessage **invals_out, uint32 *ninvals_out, SharedInvalidationMessage *msgs_new, Size nmsgs_new)
static ReorderBufferTXN * ReorderBufferLargestTXN(ReorderBuffer *rb)
void ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, RelFileLocator locator, ItemPointerData tid, CommandId cmin, CommandId cmax, CommandId combocid)
void ReorderBufferSetBaseSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
static void ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferToastInitHash(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, TimestampTz abort_time)
static bool ReorderBufferCanStartStreaming(ReorderBuffer *rb)
static void ReorderBufferResetTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id, XLogRecPtr last_lsn, ReorderBufferChange *specinsert)
bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *rb, TransactionId xid)
void ReorderBufferInvalidate(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
TransactionId ReorderBufferGetOldestXmin(ReorderBuffer *rb)
static int ReorderBufferIterCompare(Datum a, Datum b, void *arg)
static void ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferIterTXNState *volatile *iter_state)
bool ResolveCminCmaxDuringDecoding(HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
static void ReorderBufferToastAppendChunk(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change)
void ReorderBufferFreeTupleBuf(HeapTuple tuple)
void ReorderBufferQueueChange(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, ReorderBufferChange *change, bool toast_insert)
void ReorderBufferPrepare(ReorderBuffer *rb, TransactionId xid, char *gid)
uint32 ReorderBufferGetInvalidations(ReorderBuffer *rb, TransactionId xid, SharedInvalidationMessage **msgs)
void ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
void ReorderBufferCommitChild(ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn)
TransactionId * ReorderBufferGetCatalogChangesXacts(ReorderBuffer *rb)
static void ReorderBufferSaveTXNSnapshot(ReorderBuffer *rb, ReorderBufferTXN *txn, Snapshot snapshot_now, CommandId command_id)
#define IsSpecInsert(action)
static Size ReorderBufferChangeSize(ReorderBufferChange *change)
int logical_decoding_work_mem
static void AssertChangeLsnOrder(ReorderBufferTXN *txn)
static bool ReorderBufferCanStream(ReorderBuffer *rb)
static int ReorderBufferTXNSizeCompare(const pairingheap_node *a, const pairingheap_node *b, void *arg)
static void ReorderBufferApplyChange(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change, bool streaming)
void ReorderBufferSkipPrepare(ReorderBuffer *rb, TransactionId xid)
bool ReorderBufferRememberPrepareInfo(ReorderBuffer *rb, TransactionId xid, XLogRecPtr prepare_lsn, XLogRecPtr end_lsn, TimestampTz prepare_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
void ReorderBufferFinishPrepared(ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, XLogRecPtr two_phase_at, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn, char *gid, bool is_commit)
static void ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn, int fd, ReorderBufferChange *change)
void ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
void ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, ReplOriginId origin_id, XLogRecPtr origin_lsn)
int debug_logical_replication_streaming
void ReorderBufferAddDistributedInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
#define IsInsertOrUpdate(action)
static void ReorderBufferSerializeReserve(ReorderBuffer *rb, Size sz)
void ReorderBufferQueueMessage(ReorderBuffer *rb, TransactionId xid, Snapshot snap, XLogRecPtr lsn, bool transactional, const char *prefix, Size message_size, const char *message)
bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *rb, TransactionId xid)
static void ReorderBufferExecuteInvalidations(uint32 nmsgs, SharedInvalidationMessage *msgs)
static void ReorderBufferIterTXNFinish(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
void ReorderBufferAddSnapshot(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Snapshot snap)
static void ReorderBufferTruncateTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, bool txn_prepared)
#define CHANGES_THRESHOLD
static ReorderBufferTXN * ReorderBufferLargestStreamableTopTXN(ReorderBuffer *rb)
static bool ReorderBufferCheckAndTruncateAbortedTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn, char *data)
HeapTuple ReorderBufferAllocTupleBuf(ReorderBuffer *rb, Size tuple_len)
static void AssertTXNLsnOrder(ReorderBuffer *rb)
#define MAX_DISTR_INVAL_MSG_PER_TXN
static void ReorderBufferApplyMessage(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool streaming)
static void ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap)
static void ReorderBufferCleanupSerializedTXNs(const char *slotname)
ReorderBufferChange * ReorderBufferAllocChange(ReorderBuffer *rb)
void ReorderBufferSetRestartPoint(ReorderBuffer *rb, XLogRecPtr ptr)
static void SetupCheckXidLive(TransactionId xid)
static bool TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
static Snapshot ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap, ReorderBufferTXN *txn, CommandId cid)
static void ReorderBufferApplyTruncate(ReorderBuffer *rb, ReorderBufferTXN *txn, int nrelations, Relation *relations, ReorderBufferChange *change, bool streaming)
static void ReorderBufferProcessPartialChange(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *change, bool toast_insert)
static void ReorderBufferToastReset(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static void ReorderBufferQueueInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs)
static ReorderBufferTXN * ReorderBufferAllocTXN(ReorderBuffer *rb)
static void ReorderBufferFreeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferImmediateInvalidation(ReorderBuffer *rb, uint32 ninvalidations, SharedInvalidationMessage *invalidations)
static void ReorderBufferTransferSnapToParent(ReorderBufferTXN *txn, ReorderBufferTXN *subtxn)
static void ReorderBufferBuildTupleCidHash(ReorderBuffer *rb, ReorderBufferTXN *txn)
static ReorderBufferChange * ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
Oid * ReorderBufferAllocRelids(ReorderBuffer *rb, int nrelids)
static void ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
static void ReorderBufferChangeMemoryUpdate(ReorderBuffer *rb, ReorderBufferChange *change, ReorderBufferTXN *txn, bool addition, Size sz)
static void ReorderBufferStreamTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
void ReorderBufferProcessXid(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
static Size ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn, TXNEntryFile *file, XLogSegNo *segno)
void ReorderBufferAssignChild(ReorderBuffer *rb, TransactionId xid, TransactionId subxid, XLogRecPtr lsn)
void ReorderBufferFree(ReorderBuffer *rb)
static void ReorderBufferSerializedPath(char *path, ReplicationSlot *slot, TransactionId xid, XLogSegNo segno)
#define IsSpecConfirmOrAbort(action)
static void ApplyLogicalMappingFile(HTAB *tuplecid_data, const char *fname)
static const Size max_changes_in_memory
void StartupReorderBuffer(void)
void ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid)
static ReorderBufferTXN * ReorderBufferTXNByXid(ReorderBuffer *rb, TransactionId xid, bool create, bool *is_new, XLogRecPtr lsn, bool create_as_top)
static void ReorderBufferMaybeMarkTXNStreamed(ReorderBuffer *rb, ReorderBufferTXN *txn)
ReorderBufferTXN * ReorderBufferGetOldestTXN(ReorderBuffer *rb)
static void ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr commit_lsn, volatile Snapshot snapshot_now, volatile CommandId command_id, bool streaming)
#define rbtxn_is_committed(txn)
#define rbtxn_has_streamable_change(txn)
#define rbtxn_has_catalog_changes(txn)
@ DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE
@ DEBUG_LOGICAL_REP_STREAMING_BUFFERED
#define RBTXN_PREPARE_STATUS_MASK
#define rbtxn_is_serialized_clear(txn)
#define RBTXN_IS_STREAMED
#define rbtxn_is_prepared(txn)
#define RBTXN_HAS_PARTIAL_CHANGE
#define rbtxn_is_streamed(txn)
#define RBTXN_SENT_PREPARE
#define rbtxn_is_toptxn(txn)
#define rbtxn_get_toptxn(txn)
#define rbtxn_is_known_subxact(txn)
#define rbtxn_is_subtxn(txn)
#define RBTXN_HAS_CATALOG_CHANGES
#define RBTXN_IS_COMMITTED
#define PG_LOGICAL_MAPPINGS_DIR
#define RBTXN_DISTR_INVAL_OVERFLOWED
#define RBTXN_IS_SERIALIZED_CLEAR
#define rbtxn_sent_prepare(txn)
#define RBTXN_IS_PREPARED
#define rbtxn_distr_inval_overflowed(txn)
#define RBTXN_SKIPPED_PREPARE
#define RBTXN_HAS_STREAMABLE_CHANGE
@ REORDER_BUFFER_CHANGE_INVALIDATION
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM
@ REORDER_BUFFER_CHANGE_MESSAGE
@ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT
@ REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID
@ REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID
@ REORDER_BUFFER_CHANGE_TRUNCATE
@ REORDER_BUFFER_CHANGE_DELETE
@ REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT
#define rbtxn_is_aborted(txn)
#define RBTXN_IS_SERIALIZED
#define rbtxn_is_serialized(txn)
#define RBTXN_IS_ABORTED
#define RBTXN_IS_SUBXACT
#define rbtxn_has_partial_change(txn)
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
#define LOGICAL_REWRITE_FORMAT
Definition rewriteheap.h:54
MemoryContext SlabContextCreate(MemoryContext parent, const char *name, Size blockSize, Size chunkSize)
Definition slab.c:322
ReplicationSlot * MyReplicationSlot
Definition slot.c:148
bool ReplicationSlotValidateName(const char *name, bool allow_reserved_name, int elevel)
Definition slot.c:267
#define PG_REPLSLOT_DIR
Definition slot.h:21
void SnapBuildSnapDecRefcount(Snapshot snap)
Definition snapbuild.c:328
bool SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr)
Definition snapbuild.c:304
SnapBuildState SnapBuildCurrentState(SnapBuild *builder)
Definition snapbuild.c:277
@ SNAPBUILD_CONSISTENT
Definition snapbuild.h:50
void TeardownHistoricSnapshot(bool is_error)
Definition snapmgr.c:1685
void SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
Definition snapmgr.c:1669
static HTAB * tuplecid_data
Definition snapmgr.c:163
struct SnapshotData * Snapshot
Definition snapshot.h:117
#define free(a)
bool attisdropped
Definition tupdesc.h:77
Definition dirent.c:26
Size keysize
Definition hsearch.h:75
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
Definition pg_list.h:54
XLogReaderState * reader
Definition logical.h:42
struct SnapBuild * snapshot_builder
Definition logical.h:44
ItemPointerData new_tid
Definition rewriteheap.h:40
RelFileLocator old_locator
Definition rewriteheap.h:37
ItemPointerData old_tid
Definition rewriteheap.h:39
RelFileLocator new_locator
Definition rewriteheap.h:38
RelFileNumber relNumber
Form_pg_class rd_rel
Definition rel.h:111
ReorderBufferChangeType action
struct ReorderBufferChange::@117::@119 truncate
struct ReorderBufferChange::@117::@121 tuplecid
RelFileLocator rlocator
ItemPointerData tid
union ReorderBufferChange::@117 data
struct ReorderBufferChange::@117::@122 inval
struct ReorderBufferChange::@117::@120 msg
struct ReorderBufferTXN * txn
RelFileLocator locator
ReplOriginId origin_id
SharedInvalidationMessage * invalidations
struct ReorderBufferChange::@117::@118 tp
ReorderBufferChange change
ReorderBufferChange * change
ReorderBufferTXN * txn
XLogRecPtr restart_decoding_lsn
pairingheap_node txn_node
TimestampTz commit_time
XLogRecPtr base_snapshot_lsn
TransactionId toplevel_xid
dlist_node catchange_node
SharedInvalidationMessage * invalidations
dlist_head tuplecids
XLogRecPtr first_lsn
TimestampTz abort_time
XLogRecPtr final_lsn
void * output_plugin_private
uint32 ninvalidations_distributed
XLogRecPtr origin_lsn
TimestampTz prepare_time
TransactionId xid
ReplOriginId origin_id
dlist_node base_snapshot_node
SharedInvalidationMessage * invalidations_distributed
dlist_head txns_by_base_snapshot_lsn
MemoryContext context
dclist_head catchange_txns
MemoryContext change_context
ReorderBufferTXN * by_txn_last_txn
TransactionId by_txn_last_xid
MemoryContext tup_context
dlist_head toplevel_by_lsn
pairingheap * txn_heap
MemoryContext txn_context
XLogRecPtr current_restart_decoding_lsn
ReplicationSlotPersistentData data
Definition slot.h:213
char fname[MAXPGPATH]
TransactionId xmin
Definition snapshot.h:153
int32 subxcnt
Definition snapshot.h:177
CommandId curcid
Definition snapshot.h:183
uint32 xcnt
Definition snapshot.h:165
TransactionId * subxip
Definition snapshot.h:176
XLogRecPtr EndRecPtr
Definition xlogreader.h:206
XLogRecPtr ReadRecPtr
Definition xlogreader.h:205
dlist_node * cur
Definition ilist.h:179
dlist_node * cur
Definition ilist.h:200
int32 va_rawsize
Definition varatt.h:34
Definition c.h:739
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:175
#define VARHDRSZ_SHORT
Definition varatt.h:278
static bool VARATT_IS_SHORT(const void *PTR)
Definition varatt.h:403
static void SET_VARSIZE_COMPRESSED(void *PTR, Size len)
Definition varatt.h:446
static bool VARATT_IS_EXTENDED(const void *PTR)
Definition varatt.h:410
static bool VARATT_IS_EXTERNAL(const void *PTR)
Definition varatt.h:354
static char * VARDATA_EXTERNAL(const void *PTR)
Definition varatt.h:340
static Size VARSIZE(const void *PTR)
Definition varatt.h:298
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static void SET_VARTAG_EXTERNAL(void *PTR, vartag_external tag)
Definition varatt.h:453
static Size VARATT_EXTERNAL_GET_EXTSIZE(varatt_external toast_pointer)
Definition varatt.h:507
@ VARTAG_INDIRECT
Definition varatt.h:86
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
static bool VARATT_EXTERNAL_IS_COMPRESSED(varatt_external toast_pointer)
Definition varatt.h:536
static Size VARSIZE_SHORT(const void *PTR)
Definition varatt.h:312
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85
#define lstat(path, sb)
Definition win32_port.h:275
#define S_ISDIR(m)
Definition win32_port.h:315
bool IsTransactionOrTransactionBlock(void)
Definition xact.c:5011
void BeginInternalSubTransaction(const char *name)
Definition xact.c:4716
TransactionId CheckXidAlive
Definition xact.c:100
void RollbackAndReleaseCurrentSubTransaction(void)
Definition xact.c:4818
void StartTransactionCommand(void)
Definition xact.c:3080
TransactionId GetCurrentTransactionIdIfAny(void)
Definition xact.c:472
TransactionId GetCurrentTransactionId(void)
Definition xact.c:455
void AbortCurrentTransaction(void)
Definition xact.c:3472
int xidComparator(const void *arg1, const void *arg2)
Definition xid.c:152
int wal_segment_size
Definition xlog.c:146
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint16 ReplOriginId
Definition xlogdefs.h:69
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
uint64 XLogSegNo
Definition xlogdefs.h:52

◆ IsSpecConfirmOrAbort

#define IsSpecConfirmOrAbort (   action)
Value:

Definition at line 201 of file reorderbuffer.c.

◆ IsSpecInsert

#define IsSpecInsert (   action)
Value:

Definition at line 197 of file reorderbuffer.c.

◆ MAX_DISTR_INVAL_MSG_PER_TXN

#define MAX_DISTR_INVAL_MSG_PER_TXN    ((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage))

Definition at line 125 of file reorderbuffer.c.

Typedef Documentation

◆ ReorderBufferDiskChange

◆ ReorderBufferIterTXNEntry

◆ ReorderBufferIterTXNState

◆ ReorderBufferToastEnt

◆ ReorderBufferTupleCidEnt

◆ ReorderBufferTupleCidKey

◆ ReorderBufferTXNByIdEnt

◆ RewriteMappingFile

◆ TXNEntryFile

Function Documentation

◆ ApplyLogicalMappingFile()

static void ApplyLogicalMappingFile ( HTAB tuplecid_data,
const char fname 
)
static

Definition at line 5365 of file reorderbuffer.c.

5366{
5367 char path[MAXPGPATH];
5368 int fd;
5369 int readBytes;
5371
5372 sprintf(path, "%s/%s", PG_LOGICAL_MAPPINGS_DIR, fname);
5374 if (fd < 0)
5375 ereport(ERROR,
5377 errmsg("could not open file \"%s\": %m", path)));
5378
5379 while (true)
5380 {
5384 bool found;
5385
5386 /* be careful about padding */
5387 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
5388
5389 /* read all mappings till the end of the file */
5391 readBytes = read(fd, &map, sizeof(LogicalRewriteMappingData));
5393
5394 if (readBytes < 0)
5395 ereport(ERROR,
5397 errmsg("could not read file \"%s\": %m",
5398 path)));
5399 else if (readBytes == 0) /* EOF */
5400 break;
5401 else if (readBytes != sizeof(LogicalRewriteMappingData))
5402 ereport(ERROR,
5404 errmsg("could not read from file \"%s\": read %d instead of %d bytes",
5405 path, readBytes,
5406 (int32) sizeof(LogicalRewriteMappingData))));
5407
5408 key.rlocator = map.old_locator;
5410 &key.tid);
5411
5412
5415
5416 /* no existing mapping, no need to update */
5417 if (!ent)
5418 continue;
5419
5420 key.rlocator = map.new_locator;
5422 &key.tid);
5423
5425 hash_search(tuplecid_data, &key, HASH_ENTER, &found);
5426
5427 if (found)
5428 {
5429 /*
5430 * Make sure the existing mapping makes sense. We sometime update
5431 * old records that did not yet have a cmax (e.g. pg_class' own
5432 * entry while rewriting it) during rewrites, so allow that.
5433 */
5434 Assert(ent->cmin == InvalidCommandId || ent->cmin == new_ent->cmin);
5435 Assert(ent->cmax == InvalidCommandId || ent->cmax == new_ent->cmax);
5436 }
5437 else
5438 {
5439 /* update mapping */
5440 new_ent->cmin = ent->cmin;
5441 new_ent->cmax = ent->cmax;
5442 new_ent->combocid = ent->combocid;
5443 }
5444 }
5445
5446 if (CloseTransientFile(fd) != 0)
5447 ereport(ERROR,
5449 errmsg("could not close file \"%s\": %m", path)));
5450}

References Assert, CloseTransientFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), HASH_ENTER, HASH_FIND, hash_search(), InvalidCommandId, ItemPointerCopy(), MAXPGPATH, LogicalRewriteMappingData::new_locator, LogicalRewriteMappingData::new_tid, LogicalRewriteMappingData::old_locator, LogicalRewriteMappingData::old_tid, OpenTransientFile(), PG_BINARY, PG_LOGICAL_MAPPINGS_DIR, pgstat_report_wait_end(), pgstat_report_wait_start(), read, sprintf, and tuplecid_data.

Referenced by UpdateLogicalMappings().

◆ AssertChangeLsnOrder()

static void AssertChangeLsnOrder ( ReorderBufferTXN txn)
static

Definition at line 1013 of file reorderbuffer.c.

1014{
1015#ifdef USE_ASSERT_CHECKING
1016 dlist_iter iter;
1018
1019 dlist_foreach(iter, &txn->changes)
1020 {
1022
1024
1027 Assert(txn->first_lsn <= cur_change->lsn);
1028
1029 if (XLogRecPtrIsValid(txn->end_lsn))
1030 Assert(cur_change->lsn <= txn->end_lsn);
1031
1033
1034 prev_lsn = cur_change->lsn;
1035 }
1036#endif
1037}

References Assert, ReorderBufferTXN::changes, dlist_iter::cur, dlist_container, dlist_foreach, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::first_lsn, and XLogRecPtrIsValid.

Referenced by ReorderBufferIterTXNInit().

◆ AssertTXNLsnOrder()

static void AssertTXNLsnOrder ( ReorderBuffer rb)
static

Definition at line 942 of file reorderbuffer.c.

943{
944#ifdef USE_ASSERT_CHECKING
945 LogicalDecodingContext *ctx = rb->private_data;
946 dlist_iter iter;
949
950 /*
951 * Skip the verification if we don't reach the LSN at which we start
952 * decoding the contents of transactions yet because until we reach the
953 * LSN, we could have transactions that don't have the association between
954 * the top-level transaction and subtransaction yet and consequently have
955 * the same LSN. We don't guarantee this association until we try to
956 * decode the actual contents of transaction. The ordering of the records
957 * prior to the start_decoding_at LSN should have been checked before the
958 * restart.
959 */
961 return;
962
963 dlist_foreach(iter, &rb->toplevel_by_lsn)
964 {
966 iter.cur);
967
968 /* start LSN must be set */
969 Assert(XLogRecPtrIsValid(cur_txn->first_lsn));
970
971 /* If there is an end LSN, it must be higher than start LSN */
972 if (XLogRecPtrIsValid(cur_txn->end_lsn))
973 Assert(cur_txn->first_lsn <= cur_txn->end_lsn);
974
975 /* Current initial LSN must be strictly higher than previous */
978
979 /* known-as-subtxn txns must not be listed */
981
982 prev_first_lsn = cur_txn->first_lsn;
983 }
984
985 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
986 {
988 base_snapshot_node,
989 iter.cur);
990
991 /* base snapshot (and its LSN) must be set */
992 Assert(cur_txn->base_snapshot != NULL);
993 Assert(XLogRecPtrIsValid(cur_txn->base_snapshot_lsn));
994
995 /* current LSN must be strictly higher than previous */
997 Assert(prev_base_snap_lsn < cur_txn->base_snapshot_lsn);
998
999 /* known-as-subtxn txns must not be listed */
1001
1002 prev_base_snap_lsn = cur_txn->base_snapshot_lsn;
1003 }
1004#endif
1005}

References Assert, dlist_iter::cur, dlist_container, dlist_foreach, XLogReaderState::EndRecPtr, fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, LogicalDecodingContext::reader, SnapBuildXactNeedsSkip(), LogicalDecodingContext::snapshot_builder, and XLogRecPtrIsValid.

Referenced by ReorderBufferAssignChild(), ReorderBufferGetOldestTXN(), ReorderBufferGetOldestXmin(), ReorderBufferSetBaseSnapshot(), and ReorderBufferTXNByXid().

◆ file_sort_by_lsn()

static int file_sort_by_lsn ( const ListCell a_p,
const ListCell b_p 
)
static

Definition at line 5467 of file reorderbuffer.c.

5468{
5471
5472 return pg_cmp_u64(a->lsn, b->lsn);
5473}

References a, b, fb(), lfirst, and pg_cmp_u64().

Referenced by UpdateLogicalMappings().

◆ ReorderBufferAbort()

void ReorderBufferAbort ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
TimestampTz  abort_time 
)

Definition at line 3088 of file reorderbuffer.c.

3090{
3091 ReorderBufferTXN *txn;
3092
3093 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3094 false);
3095
3096 /* unknown, nothing to remove */
3097 if (txn == NULL)
3098 return;
3099
3100 txn->abort_time = abort_time;
3101
3102 /* For streamed transactions notify the remote node about the abort. */
3103 if (rbtxn_is_streamed(txn))
3104 {
3105 rb->stream_abort(rb, txn, lsn);
3106
3107 /*
3108 * We might have decoded changes for this transaction that could load
3109 * the cache as per the current transaction's view (consider DDL's
3110 * happened in this transaction). We don't want the decoding of future
3111 * transactions to use those cache entries so execute only the inval
3112 * messages in this transaction.
3113 */
3114 if (txn->ninvalidations > 0)
3116 txn->invalidations);
3117 }
3118
3119 /* cosmetic... */
3120 txn->final_lsn = lsn;
3121
3122 /* remove potential on-disk data, and deallocate */
3124}

References ReorderBufferTXN::abort_time, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodeAbort().

◆ ReorderBufferAbortOld()

void ReorderBufferAbortOld ( ReorderBuffer rb,
TransactionId  oldestRunningXid 
)

Definition at line 3134 of file reorderbuffer.c.

3135{
3137
3138 /*
3139 * Iterate through all (potential) toplevel TXNs and abort all that are
3140 * older than what possibly can be running. Once we've found the first
3141 * that is alive we stop, there might be some that acquired an xid earlier
3142 * but started writing later, but it's unlikely and they will be cleaned
3143 * up in a later call to this function.
3144 */
3145 dlist_foreach_modify(it, &rb->toplevel_by_lsn)
3146 {
3147 ReorderBufferTXN *txn;
3148
3149 txn = dlist_container(ReorderBufferTXN, node, it.cur);
3150
3151 if (TransactionIdPrecedes(txn->xid, oldestRunningXid))
3152 {
3153 elog(DEBUG2, "aborting old transaction %u", txn->xid);
3154
3155 /* Notify the remote node about the crash/immediate restart. */
3156 if (rbtxn_is_streamed(txn))
3157 rb->stream_abort(rb, txn, InvalidXLogRecPtr);
3158
3159 /* remove potential on-disk data, and deallocate this tx */
3161 }
3162 else
3163 return;
3164 }
3165}

References DEBUG2, dlist_container, dlist_foreach_modify, elog, fb(), InvalidXLogRecPtr, rbtxn_is_streamed, ReorderBufferCleanupTXN(), TransactionIdPrecedes(), and ReorderBufferTXN::xid.

Referenced by standby_decode().

◆ ReorderBufferAccumulateInvalidations()

static void ReorderBufferAccumulateInvalidations ( SharedInvalidationMessage **  invals_out,
uint32 ninvals_out,
SharedInvalidationMessage msgs_new,
Size  nmsgs_new 
)
static

Definition at line 3506 of file reorderbuffer.c.

3510{
3511 if (*ninvals_out == 0)
3512 {
3516 }
3517 else
3518 {
3519 /* Enlarge the array of inval messages */
3522 (*ninvals_out + nmsgs_new));
3526 }
3527}

References fb(), palloc_array, and repalloc().

Referenced by ReorderBufferAddDistributedInvalidations(), and ReorderBufferAddInvalidations().

◆ ReorderBufferAddDistributedInvalidations()

void ReorderBufferAddDistributedInvalidations ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Size  nmsgs,
SharedInvalidationMessage msgs 
)

Definition at line 3584 of file reorderbuffer.c.

3587{
3588 ReorderBufferTXN *txn;
3589 MemoryContext oldcontext;
3590
3591 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3592
3593 oldcontext = MemoryContextSwitchTo(rb->context);
3594
3595 /*
3596 * Collect all the invalidations under the top transaction, if available,
3597 * so that we can execute them all together. See comments
3598 * ReorderBufferAddInvalidations.
3599 */
3600 txn = rbtxn_get_toptxn(txn);
3601
3602 Assert(nmsgs > 0);
3603
3605 {
3606 /*
3607 * Check the transaction has enough space for storing distributed
3608 * invalidation messages.
3609 */
3611 {
3612 /*
3613 * Mark the invalidation message as overflowed and free up the
3614 * messages accumulated so far.
3615 */
3617
3619 {
3623 }
3624 }
3625 else
3628 msgs, nmsgs);
3629 }
3630
3631 /* Queue the invalidation messages into the transaction */
3632 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3633
3634 MemoryContextSwitchTo(oldcontext);
3635}

References Assert, fb(), ReorderBufferTXN::invalidations_distributed, MAX_DISTR_INVAL_MSG_PER_TXN, MemoryContextSwitchTo(), ReorderBufferTXN::ninvalidations_distributed, pfree(), RBTXN_DISTR_INVAL_OVERFLOWED, rbtxn_distr_inval_overflowed, rbtxn_get_toptxn, ReorderBufferAccumulateInvalidations(), ReorderBufferQueueInvalidations(), ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by SnapBuildDistributeSnapshotAndInval().

◆ ReorderBufferAddInvalidations()

void ReorderBufferAddInvalidations ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Size  nmsgs,
SharedInvalidationMessage msgs 
)

Definition at line 3543 of file reorderbuffer.c.

3546{
3547 ReorderBufferTXN *txn;
3548 MemoryContext oldcontext;
3549
3550 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3551
3552 oldcontext = MemoryContextSwitchTo(rb->context);
3553
3554 /*
3555 * Collect all the invalidations under the top transaction, if available,
3556 * so that we can execute them all together. See comments atop this
3557 * function.
3558 */
3559 txn = rbtxn_get_toptxn(txn);
3560
3561 Assert(nmsgs > 0);
3562
3564 &txn->ninvalidations,
3565 msgs, nmsgs);
3566
3567 ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
3568
3569 MemoryContextSwitchTo(oldcontext);
3570}

References Assert, fb(), ReorderBufferTXN::invalidations, MemoryContextSwitchTo(), ReorderBufferTXN::ninvalidations, rbtxn_get_toptxn, ReorderBufferAccumulateInvalidations(), ReorderBufferQueueInvalidations(), and ReorderBufferTXNByXid().

Referenced by xact_decode().

◆ ReorderBufferAddNewCommandId()

◆ ReorderBufferAddNewTupleCids()

◆ ReorderBufferAddSnapshot()

◆ ReorderBufferAllocate()

ReorderBuffer * ReorderBufferAllocate ( void  )

Definition at line 324 of file reorderbuffer.c.

325{
326 ReorderBuffer *buffer;
329
331
332 /* allocate memory in own context, to have better accountability */
334 "ReorderBuffer",
336
337 buffer =
339
340 memset(&hash_ctl, 0, sizeof(hash_ctl));
341
342 buffer->context = new_ctx;
343
345 "Change",
347 sizeof(ReorderBufferChange));
348
350 "TXN",
352 sizeof(ReorderBufferTXN));
353
354 /*
355 * To minimize memory fragmentation caused by long-running transactions
356 * with changes spanning multiple memory blocks, we use a single
357 * fixed-size memory block for decoded tuple storage. The performance
358 * testing showed that the default memory block size maintains logical
359 * decoding performance without causing fragmentation due to concurrent
360 * transactions. One might think that we can use the max size as
361 * SLAB_LARGE_BLOCK_SIZE but the test also showed it doesn't help resolve
362 * the memory fragmentation.
363 */
365 "Tuples",
369
370 hash_ctl.keysize = sizeof(TransactionId);
371 hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
372 hash_ctl.hcxt = buffer->context;
373
374 buffer->by_txn = hash_create("ReorderBufferByXid", 1000, &hash_ctl,
376
378 buffer->by_txn_last_txn = NULL;
379
380 buffer->outbuf = NULL;
381 buffer->outbufsize = 0;
382 buffer->size = 0;
383
384 /* txn_heap is ordered by transaction size */
386
387 buffer->spillTxns = 0;
388 buffer->spillCount = 0;
389 buffer->spillBytes = 0;
390 buffer->streamTxns = 0;
391 buffer->streamCount = 0;
392 buffer->streamBytes = 0;
393 buffer->memExceededCount = 0;
394 buffer->totalTxns = 0;
395 buffer->totalBytes = 0;
396
398
399 dlist_init(&buffer->toplevel_by_lsn);
401 dclist_init(&buffer->catchange_txns);
402
403 /*
404 * Ensure there's no stale data from prior uses of this slot, in case some
405 * prior exit avoided calling ReorderBufferFree. Failure to do this can
406 * produce duplicated txns, and it's very cheap if there's nothing there.
407 */
409
410 return buffer;
411}

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ReorderBuffer::by_txn, ReorderBuffer::by_txn_last_txn, ReorderBuffer::by_txn_last_xid, ReorderBuffer::catchange_txns, ReorderBuffer::change_context, ReorderBuffer::context, ReorderBuffer::current_restart_decoding_lsn, CurrentMemoryContext, ReplicationSlot::data, dclist_init(), dlist_init(), fb(), GenerationContextCreate(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, InvalidTransactionId, InvalidXLogRecPtr, ReorderBuffer::memExceededCount, MemoryContextAlloc(), MyReplicationSlot, ReplicationSlotPersistentData::name, NameStr, ReorderBuffer::outbuf, ReorderBuffer::outbufsize, pairingheap_allocate(), ReorderBufferCleanupSerializedTXNs(), ReorderBufferTXNSizeCompare(), ReorderBuffer::size, SLAB_DEFAULT_BLOCK_SIZE, SlabContextCreate(), ReorderBuffer::spillBytes, ReorderBuffer::spillCount, ReorderBuffer::spillTxns, ReorderBuffer::streamBytes, ReorderBuffer::streamCount, ReorderBuffer::streamTxns, ReorderBuffer::toplevel_by_lsn, ReorderBuffer::totalBytes, ReorderBuffer::totalTxns, ReorderBuffer::tup_context, ReorderBuffer::txn_context, ReorderBuffer::txn_heap, and ReorderBuffer::txns_by_base_snapshot_lsn.

Referenced by StartupDecodingContext().

◆ ReorderBufferAllocChange()

◆ ReorderBufferAllocRelids()

Oid * ReorderBufferAllocRelids ( ReorderBuffer rb,
int  nrelids 
)

Definition at line 625 of file reorderbuffer.c.

626{
627 Oid *relids;
629
630 alloc_len = sizeof(Oid) * nrelids;
631
632 relids = (Oid *) MemoryContextAlloc(rb->context, alloc_len);
633
634 return relids;
635}

References fb(), and MemoryContextAlloc().

Referenced by DecodeTruncate(), and ReorderBufferRestoreChange().

◆ ReorderBufferAllocTupleBuf()

HeapTuple ReorderBufferAllocTupleBuf ( ReorderBuffer rb,
Size  tuple_len 
)

Definition at line 592 of file reorderbuffer.c.

593{
594 HeapTuple tuple;
596
597 alloc_len = tuple_len + SizeofHeapTupleHeader;
598
599 tuple = (HeapTuple) MemoryContextAlloc(rb->tup_context,
601 tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
602
603 return tuple;
604}

References fb(), HEAPTUPLESIZE, MemoryContextAlloc(), SizeofHeapTupleHeader, and HeapTupleData::t_data.

Referenced by DecodeDelete(), DecodeInsert(), DecodeMultiInsert(), DecodeUpdate(), and ReorderBufferRestoreChange().

◆ ReorderBufferAllocTXN()

static ReorderBufferTXN * ReorderBufferAllocTXN ( ReorderBuffer rb)
static

Definition at line 435 of file reorderbuffer.c.

436{
437 ReorderBufferTXN *txn;
438
439 txn = (ReorderBufferTXN *)
440 MemoryContextAlloc(rb->txn_context, sizeof(ReorderBufferTXN));
441
442 memset(txn, 0, sizeof(ReorderBufferTXN));
443
444 dlist_init(&txn->changes);
445 dlist_init(&txn->tuplecids);
446 dlist_init(&txn->subtxns);
447
448 /* InvalidCommandId is not zero, so set it explicitly */
451
452 return txn;
453}

References ReorderBufferTXN::changes, ReorderBufferTXN::command_id, dlist_init(), fb(), InvalidCommandId, MemoryContextAlloc(), ReorderBufferTXN::output_plugin_private, ReorderBufferTXN::subtxns, and ReorderBufferTXN::tuplecids.

Referenced by ReorderBufferTXNByXid().

◆ ReorderBufferApplyChange()

static void ReorderBufferApplyChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2072 of file reorderbuffer.c.

2075{
2076 if (streaming)
2077 rb->stream_change(rb, txn, relation, change);
2078 else
2079 rb->apply_change(rb, txn, relation, change);
2080}

References fb().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferApplyMessage()

static void ReorderBufferApplyMessage ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2100 of file reorderbuffer.c.

2102{
2103 if (streaming)
2104 rb->stream_message(rb, txn, change->lsn, true,
2105 change->data.msg.prefix,
2106 change->data.msg.message_size,
2107 change->data.msg.message);
2108 else
2109 rb->message(rb, txn, change->lsn, true,
2110 change->data.msg.prefix,
2111 change->data.msg.message_size,
2112 change->data.msg.message);
2113}

References ReorderBufferChange::data, fb(), ReorderBufferChange::lsn, ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, and ReorderBufferChange::prefix.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferApplyTruncate()

static void ReorderBufferApplyTruncate ( ReorderBuffer rb,
ReorderBufferTXN txn,
int  nrelations,
Relation relations,
ReorderBufferChange change,
bool  streaming 
)
inlinestatic

Definition at line 2086 of file reorderbuffer.c.

2089{
2090 if (streaming)
2091 rb->stream_truncate(rb, txn, nrelations, relations, change);
2092 else
2093 rb->apply_truncate(rb, txn, nrelations, relations, change);
2094}

References fb().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferAssignChild()

void ReorderBufferAssignChild ( ReorderBuffer rb,
TransactionId  xid,
TransactionId  subxid,
XLogRecPtr  lsn 
)

Definition at line 1099 of file reorderbuffer.c.

1101{
1102 ReorderBufferTXN *txn;
1104 bool new_top;
1105 bool new_sub;
1106
1107 txn = ReorderBufferTXNByXid(rb, xid, true, &new_top, lsn, true);
1108 subtxn = ReorderBufferTXNByXid(rb, subxid, true, &new_sub, lsn, false);
1109
1110 if (!new_sub)
1111 {
1113 {
1114 /* already associated, nothing to do */
1115 return;
1116 }
1117 else
1118 {
1119 /*
1120 * We already saw this transaction, but initially added it to the
1121 * list of top-level txns. Now that we know it's not top-level,
1122 * remove it from there.
1123 */
1124 dlist_delete(&subtxn->node);
1125 }
1126 }
1127
1128 subtxn->txn_flags |= RBTXN_IS_SUBXACT;
1129 subtxn->toplevel_xid = xid;
1130 Assert(subtxn->nsubtxns == 0);
1131
1132 /* set the reference to top-level transaction */
1133 subtxn->toptxn = txn;
1134
1135 /* add to subtransaction list */
1136 dlist_push_tail(&txn->subtxns, &subtxn->node);
1137 txn->nsubtxns++;
1138
1139 /* Possibly transfer the subtxn's snapshot to its top-level txn. */
1141
1142 /* Verify LSN-ordering invariant */
1144}

References Assert, AssertTXNLsnOrder(), dlist_delete(), dlist_push_tail(), fb(), ReorderBufferTXN::nsubtxns, rbtxn_is_known_subxact, RBTXN_IS_SUBXACT, ReorderBufferTransferSnapToParent(), ReorderBufferTXNByXid(), and ReorderBufferTXN::subtxns.

Referenced by LogicalDecodingProcessRecord(), and ReorderBufferCommitChild().

◆ ReorderBufferBuildTupleCidHash()

static void ReorderBufferBuildTupleCidHash ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1836 of file reorderbuffer.c.

1837{
1838 dlist_iter iter;
1840
1842 return;
1843
1845 hash_ctl.entrysize = sizeof(ReorderBufferTupleCidEnt);
1846 hash_ctl.hcxt = rb->context;
1847
1848 /*
1849 * create the hash with the exact number of to-be-stored tuplecids from
1850 * the start
1851 */
1852 txn->tuplecid_hash =
1853 hash_create("ReorderBufferTupleCid", txn->ntuplecids, &hash_ctl,
1855
1856 dlist_foreach(iter, &txn->tuplecids)
1857 {
1860 bool found;
1861 ReorderBufferChange *change;
1862
1863 change = dlist_container(ReorderBufferChange, node, iter.cur);
1864
1866
1867 /* be careful about padding */
1868 memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
1869
1870 key.rlocator = change->data.tuplecid.locator;
1871
1873 &key.tid);
1874
1876 hash_search(txn->tuplecid_hash, &key, HASH_ENTER, &found);
1877 if (!found)
1878 {
1879 ent->cmin = change->data.tuplecid.cmin;
1880 ent->cmax = change->data.tuplecid.cmax;
1881 ent->combocid = change->data.tuplecid.combocid;
1882 }
1883 else
1884 {
1885 /*
1886 * Maybe we already saw this tuple before in this transaction, but
1887 * if so it must have the same cmin.
1888 */
1889 Assert(ent->cmin == change->data.tuplecid.cmin);
1890
1891 /*
1892 * cmax may be initially invalid, but once set it can only grow,
1893 * and never become invalid again.
1894 */
1895 Assert((ent->cmax == InvalidCommandId) ||
1896 ((change->data.tuplecid.cmax != InvalidCommandId) &&
1897 (change->data.tuplecid.cmax > ent->cmax)));
1898 ent->cmax = change->data.tuplecid.cmax;
1899 }
1900 }
1901}

References ReorderBufferChange::action, Assert, ReorderBufferChange::cmax, ReorderBufferChange::cmin, ReorderBufferChange::combocid, dlist_iter::cur, ReorderBufferChange::data, dlist_container, dlist_foreach, dlist_is_empty(), fb(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_ENTER, hash_search(), InvalidCommandId, ItemPointerCopy(), HASHCTL::keysize, ReorderBufferChange::locator, ReorderBufferTXN::ntuplecids, rbtxn_has_catalog_changes, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChange::tid, ReorderBufferChange::tuplecid, ReorderBufferTXN::tuplecid_hash, and ReorderBufferTXN::tuplecids.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferCanStartStreaming()

static bool ReorderBufferCanStartStreaming ( ReorderBuffer rb)
inlinestatic

Definition at line 4317 of file reorderbuffer.c.

4318{
4319 LogicalDecodingContext *ctx = rb->private_data;
4320 SnapBuild *builder = ctx->snapshot_builder;
4321
4322 /* We can't start streaming unless a consistent state is reached. */
4324 return false;
4325
4326 /*
4327 * We can't start streaming immediately even if the streaming is enabled
4328 * because we previously decoded this transaction and now just are
4329 * restarting.
4330 */
4332 !SnapBuildXactNeedsSkip(builder, ctx->reader->ReadRecPtr))
4333 return true;
4334
4335 return false;
4336}

References fb(), LogicalDecodingContext::reader, XLogReaderState::ReadRecPtr, ReorderBufferCanStream(), SNAPBUILD_CONSISTENT, SnapBuildCurrentState(), SnapBuildXactNeedsSkip(), and LogicalDecodingContext::snapshot_builder.

Referenced by ReorderBufferCheckMemoryLimit(), and ReorderBufferProcessPartialChange().

◆ ReorderBufferCanStream()

static bool ReorderBufferCanStream ( ReorderBuffer rb)
inlinestatic

Definition at line 4308 of file reorderbuffer.c.

4309{
4310 LogicalDecodingContext *ctx = rb->private_data;
4311
4312 return ctx->streaming;
4313}

References fb(), and LogicalDecodingContext::streaming.

Referenced by ReorderBufferCanStartStreaming(), and ReorderBufferProcessPartialChange().

◆ ReorderBufferChangeMemoryUpdate()

static void ReorderBufferChangeMemoryUpdate ( ReorderBuffer rb,
ReorderBufferChange change,
ReorderBufferTXN txn,
bool  addition,
Size  sz 
)
static

Definition at line 3386 of file reorderbuffer.c.

3390{
3391 ReorderBufferTXN *toptxn;
3392
3393 Assert(txn || change);
3394
3395 /*
3396 * Ignore tuple CID changes, because those are not evicted when reaching
3397 * memory limit. So we just don't count them, because it might easily
3398 * trigger a pointless attempt to spill.
3399 */
3400 if (change && change->action == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID)
3401 return;
3402
3403 if (sz == 0)
3404 return;
3405
3406 if (txn == NULL)
3407 txn = change->txn;
3408 Assert(txn != NULL);
3409
3410 /*
3411 * Update the total size in top level as well. This is later used to
3412 * compute the decoding stats.
3413 */
3414 toptxn = rbtxn_get_toptxn(txn);
3415
3416 if (addition)
3417 {
3418 Size oldsize = txn->size;
3419
3420 txn->size += sz;
3421 rb->size += sz;
3422
3423 /* Update the total size in the top transaction. */
3424 toptxn->total_size += sz;
3425
3426 /* Update the max-heap */
3427 if (oldsize != 0)
3428 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3429 pairingheap_add(rb->txn_heap, &txn->txn_node);
3430 }
3431 else
3432 {
3433 Assert((rb->size >= sz) && (txn->size >= sz));
3434 txn->size -= sz;
3435 rb->size -= sz;
3436
3437 /* Update the total size in the top transaction. */
3438 toptxn->total_size -= sz;
3439
3440 /* Update the max-heap */
3441 pairingheap_remove(rb->txn_heap, &txn->txn_node);
3442 if (txn->size != 0)
3443 pairingheap_add(rb->txn_heap, &txn->txn_node);
3444 }
3445
3446 Assert(txn->size <= rb->size);
3447}

References ReorderBufferChange::action, Assert, fb(), pairingheap_add(), pairingheap_remove(), rbtxn_get_toptxn, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferTXN::size, ReorderBufferTXN::total_size, ReorderBufferChange::txn, and ReorderBufferTXN::txn_node.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferQueueChange(), ReorderBufferRestoreChange(), ReorderBufferSerializeTXN(), ReorderBufferToastReplace(), and ReorderBufferTruncateTXN().

◆ ReorderBufferChangeSize()

static Size ReorderBufferChangeSize ( ReorderBufferChange change)
static

Definition at line 4460 of file reorderbuffer.c.

4461{
4462 Size sz = sizeof(ReorderBufferChange);
4463
4464 switch (change->action)
4465 {
4466 /* fall through these, they're all similar enough */
4471 {
4473 newtup;
4474 Size oldlen = 0;
4475 Size newlen = 0;
4476
4477 oldtup = change->data.tp.oldtuple;
4478 newtup = change->data.tp.newtuple;
4479
4480 if (oldtup)
4481 {
4482 sz += sizeof(HeapTupleData);
4483 oldlen = oldtup->t_len;
4484 sz += oldlen;
4485 }
4486
4487 if (newtup)
4488 {
4489 sz += sizeof(HeapTupleData);
4490 newlen = newtup->t_len;
4491 sz += newlen;
4492 }
4493
4494 break;
4495 }
4497 {
4498 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4499
4500 sz += prefix_size + change->data.msg.message_size +
4501 sizeof(Size) + sizeof(Size);
4502
4503 break;
4504 }
4506 {
4507 sz += sizeof(SharedInvalidationMessage) *
4508 change->data.inval.ninvalidations;
4509 break;
4510 }
4512 {
4513 Snapshot snap;
4514
4515 snap = change->data.snapshot;
4516
4517 sz += sizeof(SnapshotData) +
4518 sizeof(TransactionId) * snap->xcnt +
4519 sizeof(TransactionId) * snap->subxcnt;
4520
4521 break;
4522 }
4524 {
4525 sz += sizeof(Oid) * change->data.truncate.nrelids;
4526
4527 break;
4528 }
4533 /* ReorderBufferChange contains everything important */
4534 break;
4535 }
4536
4537 return sz;
4538}

References ReorderBufferChange::action, ReorderBufferChange::data, fb(), ReorderBufferChange::inval, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::prefix, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChange::snapshot, HeapTupleData::t_len, ReorderBufferChange::tp, ReorderBufferChange::truncate, and SnapshotData::xcnt.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferQueueChange(), ReorderBufferRestoreChange(), ReorderBufferToastReplace(), and ReorderBufferTruncateTXN().

◆ ReorderBufferCheckAndTruncateAbortedTXN()

static bool ReorderBufferCheckAndTruncateAbortedTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1774 of file reorderbuffer.c.

1775{
1776 /* Quick return for regression tests */
1778 return false;
1779
1780 /*
1781 * Quick return if the transaction status is already known.
1782 */
1783
1784 if (rbtxn_is_committed(txn))
1785 return false;
1786 if (rbtxn_is_aborted(txn))
1787 {
1788 /* Already-aborted transactions should not have any changes */
1789 Assert(txn->size == 0);
1790
1791 return true;
1792 }
1793
1794 /* Otherwise, check the transaction status using CLOG lookup */
1795
1797 return false;
1798
1799 if (TransactionIdDidCommit(txn->xid))
1800 {
1801 /*
1802 * Remember the transaction is committed so that we can skip CLOG
1803 * check next time, avoiding the pressure on CLOG lookup.
1804 */
1805 Assert(!rbtxn_is_aborted(txn));
1807 return false;
1808 }
1809
1810 /*
1811 * The transaction aborted. We discard both the changes collected so far
1812 * and the toast reconstruction data. The full cleanup will happen as part
1813 * of decoding ABORT record of this transaction.
1814 */
1817
1818 /* All changes should be discarded */
1819 Assert(txn->size == 0);
1820
1821 /*
1822 * Mark the transaction as aborted so we can ignore future changes of this
1823 * transaction.
1824 */
1827
1828 return true;
1829}

References Assert, DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE, debug_logical_replication_streaming, fb(), RBTXN_IS_ABORTED, rbtxn_is_aborted, RBTXN_IS_COMMITTED, rbtxn_is_committed, rbtxn_is_prepared, ReorderBufferToastReset(), ReorderBufferTruncateTXN(), ReorderBufferTXN::size, TransactionIdDidCommit(), TransactionIdIsInProgress(), ReorderBufferTXN::txn_flags, unlikely, and ReorderBufferTXN::xid.

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferCheckMemoryLimit()

static void ReorderBufferCheckMemoryLimit ( ReorderBuffer rb)
static

Definition at line 3897 of file reorderbuffer.c.

3898{
3899 ReorderBufferTXN *txn;
3900 bool update_stats = true;
3901
3902 if (rb->size >= logical_decoding_work_mem * (Size) 1024)
3903 {
3904 /*
3905 * Update the statistics as the memory usage has reached the limit. We
3906 * report the statistics update later in this function since we can
3907 * update the slot statistics altogether while streaming or
3908 * serializing transactions in most cases.
3909 */
3910 rb->memExceededCount += 1;
3911 }
3913 {
3914 /*
3915 * Bail out if debug_logical_replication_streaming is buffered and we
3916 * haven't exceeded the memory limit.
3917 */
3918 return;
3919 }
3920
3921 /*
3922 * If debug_logical_replication_streaming is immediate, loop until there's
3923 * no change. Otherwise, loop until we reach under the memory limit. One
3924 * might think that just by evicting the largest (sub)transaction we will
3925 * come under the memory limit based on assumption that the selected
3926 * transaction is at least as large as the most recent change (which
3927 * caused us to go over the memory limit). However, that is not true
3928 * because a user can reduce the logical_decoding_work_mem to a smaller
3929 * value before the most recent change.
3930 */
3931 while (rb->size >= logical_decoding_work_mem * (Size) 1024 ||
3933 rb->size > 0))
3934 {
3935 /*
3936 * Pick the largest non-aborted transaction and evict it from memory
3937 * by streaming, if possible. Otherwise, spill to disk.
3938 */
3941 {
3942 /* we know there has to be one, because the size is not zero */
3943 Assert(txn && rbtxn_is_toptxn(txn));
3944 Assert(txn->total_size > 0);
3945 Assert(rb->size >= txn->total_size);
3946
3947 /* skip the transaction if aborted */
3949 continue;
3950
3952 }
3953 else
3954 {
3955 /*
3956 * Pick the largest transaction (or subtransaction) and evict it
3957 * from memory by serializing it to disk.
3958 */
3960
3961 /* we know there has to be one, because the size is not zero */
3962 Assert(txn);
3963 Assert(txn->size > 0);
3964 Assert(rb->size >= txn->size);
3965
3966 /* skip the transaction if aborted */
3968 continue;
3969
3971 }
3972
3973 /*
3974 * After eviction, the transaction should have no entries in memory,
3975 * and should use 0 bytes for changes.
3976 */
3977 Assert(txn->size == 0);
3978 Assert(txn->nentries_mem == 0);
3979
3980 /*
3981 * We've reported the memExceededCount update while streaming or
3982 * serializing the transaction.
3983 */
3984 update_stats = false;
3985 }
3986
3987 if (update_stats)
3989
3990 /* We must be under the memory limit now. */
3991 Assert(rb->size < logical_decoding_work_mem * (Size) 1024);
3992}

References Assert, DEBUG_LOGICAL_REP_STREAMING_BUFFERED, DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE, debug_logical_replication_streaming, fb(), logical_decoding_work_mem, ReorderBufferTXN::nentries_mem, rbtxn_is_toptxn, ReorderBufferCanStartStreaming(), ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferLargestStreamableTopTXN(), ReorderBufferLargestTXN(), ReorderBufferSerializeTXN(), ReorderBufferStreamTXN(), ReorderBufferTXN::size, ReorderBufferTXN::total_size, and UpdateDecodingStats().

Referenced by ReorderBufferQueueChange().

◆ ReorderBufferCleanupSerializedTXNs()

static void ReorderBufferCleanupSerializedTXNs ( const char slotname)
static

Definition at line 4885 of file reorderbuffer.c.

4886{
4887 DIR *spill_dir;
4888 struct dirent *spill_de;
4889 struct stat statbuf;
4890 char path[MAXPGPATH * 2 + sizeof(PG_REPLSLOT_DIR)];
4891
4892 sprintf(path, "%s/%s", PG_REPLSLOT_DIR, slotname);
4893
4894 /* we're only handling directories here, skip if it's not ours */
4895 if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
4896 return;
4897
4898 spill_dir = AllocateDir(path);
4899 while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL)
4900 {
4901 /* only look at names that can be ours */
4902 if (strncmp(spill_de->d_name, "xid", 3) == 0)
4903 {
4904 snprintf(path, sizeof(path),
4905 "%s/%s/%s", PG_REPLSLOT_DIR, slotname,
4906 spill_de->d_name);
4907
4908 if (unlink(path) != 0)
4909 ereport(ERROR,
4911 errmsg("could not remove file \"%s\" during removal of %s/%s/xid*: %m",
4912 path, PG_REPLSLOT_DIR, slotname)));
4913 }
4914 }
4916}

References AllocateDir(), ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), FreeDir(), INFO, lstat, MAXPGPATH, PG_REPLSLOT_DIR, ReadDirExtended(), S_ISDIR, snprintf, and sprintf.

Referenced by ReorderBufferAllocate(), ReorderBufferFree(), and StartupReorderBuffer().

◆ ReorderBufferCleanupTXN()

static void ReorderBufferCleanupTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1535 of file reorderbuffer.c.

1536{
1537 bool found;
1538 dlist_mutable_iter iter;
1539 Size mem_freed = 0;
1540
1541 /* cleanup subtransactions & their changes */
1542 dlist_foreach_modify(iter, &txn->subtxns)
1543 {
1545
1547
1548 /*
1549 * Subtransactions are always associated to the toplevel TXN, even if
1550 * they originally were happening inside another subtxn, so we won't
1551 * ever recurse more than one level deep here.
1552 */
1554 Assert(subtxn->nsubtxns == 0);
1555
1557 }
1558
1559 /* cleanup changes in the txn */
1560 dlist_foreach_modify(iter, &txn->changes)
1561 {
1562 ReorderBufferChange *change;
1563
1564 change = dlist_container(ReorderBufferChange, node, iter.cur);
1565
1566 /* Check we're not mixing changes from different transactions. */
1567 Assert(change->txn == txn);
1568
1569 /*
1570 * Instead of updating the memory counter for individual changes, we
1571 * sum up the size of memory to free so we can update the memory
1572 * counter all together below. This saves costs of maintaining the
1573 * max-heap.
1574 */
1576
1577 ReorderBufferFreeChange(rb, change, false);
1578 }
1579
1580 /* Update the memory counter */
1582
1583 /*
1584 * Cleanup the tuplecids we stored for decoding catalog snapshot access.
1585 * They are always stored in the toplevel transaction.
1586 */
1587 dlist_foreach_modify(iter, &txn->tuplecids)
1588 {
1589 ReorderBufferChange *change;
1590
1591 change = dlist_container(ReorderBufferChange, node, iter.cur);
1592
1593 /* Check we're not mixing changes from different transactions. */
1594 Assert(change->txn == txn);
1596
1597 ReorderBufferFreeChange(rb, change, true);
1598 }
1599
1600 /*
1601 * Cleanup the base snapshot, if set.
1602 */
1603 if (txn->base_snapshot != NULL)
1604 {
1607 }
1608
1609 /*
1610 * Cleanup the snapshot for the last streamed run.
1611 */
1612 if (txn->snapshot_now != NULL)
1613 {
1616 }
1617
1618 /*
1619 * Remove TXN from its containing lists.
1620 *
1621 * Note: if txn is known as subxact, we are deleting the TXN from its
1622 * parent's list of known subxacts; this leaves the parent's nsubxacts
1623 * count too high, but we don't care. Otherwise, we are deleting the TXN
1624 * from the LSN-ordered list of toplevel TXNs. We remove the TXN from the
1625 * list of catalog modifying transactions as well.
1626 */
1627 dlist_delete(&txn->node);
1629 dclist_delete_from(&rb->catchange_txns, &txn->catchange_node);
1630
1631 /* now remove reference from buffer */
1632 hash_search(rb->by_txn, &txn->xid, HASH_REMOVE, &found);
1633 Assert(found);
1634
1635 /* remove entries spilled to disk */
1636 if (rbtxn_is_serialized(txn))
1638
1639 /* deallocate */
1641}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_node, ReorderBufferTXN::catchange_node, ReorderBufferTXN::changes, dlist_mutable_iter::cur, dclist_delete_from(), dlist_container, dlist_delete(), dlist_foreach_modify, fb(), HASH_REMOVE, hash_search(), ReorderBufferTXN::node, rbtxn_has_catalog_changes, rbtxn_is_known_subxact, rbtxn_is_serialized, rbtxn_is_streamed, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferFreeSnap(), ReorderBufferFreeTXN(), ReorderBufferRestoreCleanup(), SnapBuildSnapDecRefcount(), ReorderBufferTXN::snapshot_now, ReorderBufferTXN::subtxns, ReorderBufferTXN::tuplecids, ReorderBufferChange::txn, and ReorderBufferTXN::xid.

Referenced by ReorderBufferAbort(), ReorderBufferAbortOld(), ReorderBufferCleanupTXN(), ReorderBufferFinishPrepared(), ReorderBufferForget(), ReorderBufferProcessTXN(), ReorderBufferReplay(), and ReorderBufferStreamCommit().

◆ ReorderBufferCommit()

void ReorderBufferCommit ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
TimestampTz  commit_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn 
)

Definition at line 2885 of file reorderbuffer.c.

2889{
2890 ReorderBufferTXN *txn;
2891
2892 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2893 false);
2894
2895 /* unknown transaction, nothing to replay */
2896 if (txn == NULL)
2897 return;
2898
2899 ReorderBufferReplay(txn, rb, xid, commit_lsn, end_lsn, commit_time,
2900 origin_id, origin_lsn);
2901}

References fb(), InvalidXLogRecPtr, ReorderBufferReplay(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit().

◆ ReorderBufferCommitChild()

void ReorderBufferCommitChild ( ReorderBuffer rb,
TransactionId  xid,
TransactionId  subxid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn 
)

Definition at line 1219 of file reorderbuffer.c.

1222{
1224
1225 subtxn = ReorderBufferTXNByXid(rb, subxid, false, NULL,
1226 InvalidXLogRecPtr, false);
1227
1228 /*
1229 * No need to do anything if that subtxn didn't contain any changes
1230 */
1231 if (!subtxn)
1232 return;
1233
1234 subtxn->final_lsn = commit_lsn;
1235 subtxn->end_lsn = end_lsn;
1236
1237 /*
1238 * Assign this subxact as a child of the toplevel xact (no-op if already
1239 * done.)
1240 */
1242}

References fb(), InvalidXLogRecPtr, ReorderBufferAssignChild(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit(), and DecodePrepare().

◆ ReorderBufferCopySnap()

static Snapshot ReorderBufferCopySnap ( ReorderBuffer rb,
Snapshot  orig_snap,
ReorderBufferTXN txn,
CommandId  cid 
)
static

Definition at line 1909 of file reorderbuffer.c.

1911{
1912 Snapshot snap;
1913 dlist_iter iter;
1914 int i = 0;
1915 Size size;
1916
1917 size = sizeof(SnapshotData) +
1918 sizeof(TransactionId) * orig_snap->xcnt +
1919 sizeof(TransactionId) * (txn->nsubtxns + 1);
1920
1921 snap = MemoryContextAllocZero(rb->context, size);
1922 memcpy(snap, orig_snap, sizeof(SnapshotData));
1923
1924 snap->copied = true;
1925 snap->active_count = 1; /* mark as active so nobody frees it */
1926 snap->regd_count = 0;
1927 snap->xip = (TransactionId *) (snap + 1);
1928
1929 memcpy(snap->xip, orig_snap->xip, sizeof(TransactionId) * snap->xcnt);
1930
1931 /*
1932 * snap->subxip contains all txids that belong to our transaction which we
1933 * need to check via cmin/cmax. That's why we store the toplevel
1934 * transaction in there as well.
1935 */
1936 snap->subxip = snap->xip + snap->xcnt;
1937 snap->subxip[i++] = txn->xid;
1938
1939 /*
1940 * txn->nsubtxns isn't decreased when subtransactions abort, so count
1941 * manually. Since it's an upper boundary it is safe to use it for the
1942 * allocation above.
1943 */
1944 snap->subxcnt = 1;
1945
1946 dlist_foreach(iter, &txn->subtxns)
1947 {
1949
1951 snap->subxip[i++] = sub_txn->xid;
1952 snap->subxcnt++;
1953 }
1954
1955 /* sort so we can bsearch() later */
1956 qsort(snap->subxip, snap->subxcnt, sizeof(TransactionId), xidComparator);
1957
1958 /* store the specified current CommandId */
1959 snap->curcid = cid;
1960
1961 return snap;
1962}

References dlist_iter::cur, dlist_container, dlist_foreach, fb(), i, MemoryContextAllocZero(), ReorderBufferTXN::nsubtxns, qsort, ReorderBufferTXN::subtxns, ReorderBufferTXN::xid, and xidComparator().

Referenced by ReorderBufferProcessTXN(), ReorderBufferSaveTXNSnapshot(), and ReorderBufferStreamTXN().

◆ ReorderBufferExecuteInvalidations()

static void ReorderBufferExecuteInvalidations ( uint32  nmsgs,
SharedInvalidationMessage msgs 
)
static

Definition at line 3642 of file reorderbuffer.c.

3643{
3644 int i;
3645
3646 for (i = 0; i < nmsgs; i++)
3648}

References i, and LocalExecuteInvalidationMessage().

Referenced by ReorderBufferFinishPrepared(), and ReorderBufferProcessTXN().

◆ ReorderBufferFinishPrepared()

void ReorderBufferFinishPrepared ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
XLogRecPtr  two_phase_at,
TimestampTz  commit_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn,
char gid,
bool  is_commit 
)

Definition at line 3002 of file reorderbuffer.c.

3007{
3008 ReorderBufferTXN *txn;
3009 XLogRecPtr prepare_end_lsn;
3010 TimestampTz prepare_time;
3011
3012 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, commit_lsn, false);
3013
3014 /* unknown transaction, nothing to do */
3015 if (txn == NULL)
3016 return;
3017
3018 /*
3019 * By this time the txn has the prepare record information, remember it to
3020 * be later used for rollback.
3021 */
3022 prepare_end_lsn = txn->end_lsn;
3023 prepare_time = txn->prepare_time;
3024
3025 /* add the gid in the txn */
3026 txn->gid = pstrdup(gid);
3027
3028 /*
3029 * It is possible that this transaction is not decoded at prepare time
3030 * either because by that time we didn't have a consistent snapshot, or
3031 * two_phase was not enabled, or it was decoded earlier but we have
3032 * restarted. We only need to send the prepare if it was not decoded
3033 * earlier. We don't need to decode the xact for aborts if it is not done
3034 * already.
3035 */
3036 if ((txn->final_lsn < two_phase_at) && is_commit)
3037 {
3038 /*
3039 * txn must have been marked as a prepared transaction and skipped but
3040 * not sent a prepare. Also, the prepare info must have been updated
3041 * in txn even if we skip prepare.
3042 */
3046
3047 /*
3048 * By this time the txn has the prepare record information and it is
3049 * important to use that so that downstream gets the accurate
3050 * information. If instead, we have passed commit information here
3051 * then downstream can behave as it has already replayed commit
3052 * prepared after the restart.
3053 */
3054 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
3055 txn->prepare_time, txn->origin_id, txn->origin_lsn);
3056 }
3057
3058 txn->final_lsn = commit_lsn;
3059 txn->end_lsn = end_lsn;
3060 txn->commit_time = commit_time;
3061 txn->origin_id = origin_id;
3062 txn->origin_lsn = origin_lsn;
3063
3064 if (is_commit)
3065 rb->commit_prepared(rb, txn, commit_lsn);
3066 else
3067 rb->rollback_prepared(rb, txn, prepare_end_lsn, prepare_time);
3068
3069 /* cleanup: make sure there's no cache pollution */
3071 txn->invalidations);
3073}

References Assert, ReorderBufferTXN::commit_time, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::gid, ReorderBufferTXN::invalidations, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, pstrdup(), RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SKIPPED_PREPARE, ReorderBufferCleanupTXN(), ReorderBufferExecuteInvalidations(), ReorderBufferReplay(), ReorderBufferTXNByXid(), ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodeAbort(), and DecodeCommit().

◆ ReorderBufferForget()

void ReorderBufferForget ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3181 of file reorderbuffer.c.

3182{
3183 ReorderBufferTXN *txn;
3184
3185 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3186 false);
3187
3188 /* unknown, nothing to forget */
3189 if (txn == NULL)
3190 return;
3191
3192 /* this transaction mustn't be streamed */
3194
3195 /* cosmetic... */
3196 txn->final_lsn = lsn;
3197
3198 /*
3199 * Process only cache invalidation messages in this transaction if there
3200 * are any. Even if we're not interested in the transaction's contents, it
3201 * could have manipulated the catalog and we need to update the caches
3202 * according to that.
3203 */
3204 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3206 txn->invalidations);
3207 else
3208 Assert(txn->ninvalidations == 0);
3209
3210 /* remove potential on-disk data, and deallocate */
3212}

References Assert, ReorderBufferTXN::base_snapshot, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodeCommit().

◆ ReorderBufferFree()

void ReorderBufferFree ( ReorderBuffer rb)

Definition at line 417 of file reorderbuffer.c.

418{
419 MemoryContext context = rb->context;
420
421 /*
422 * We free separately allocated data by entirely scrapping reorderbuffer's
423 * memory context.
424 */
425 MemoryContextDelete(context);
426
427 /* Free disk space used by unconsumed reorder buffers */
429}

References ReplicationSlot::data, fb(), MemoryContextDelete(), MyReplicationSlot, ReplicationSlotPersistentData::name, NameStr, and ReorderBufferCleanupSerializedTXNs().

Referenced by FreeDecodingContext().

◆ ReorderBufferFreeChange()

void ReorderBufferFreeChange ( ReorderBuffer rb,
ReorderBufferChange change,
bool  upd_mem 
)

Definition at line 522 of file reorderbuffer.c.

524{
525 /* update memory accounting info */
526 if (upd_mem)
529
530 /* free contained data */
531 switch (change->action)
532 {
537 if (change->data.tp.newtuple)
538 {
540 change->data.tp.newtuple = NULL;
541 }
542
543 if (change->data.tp.oldtuple)
544 {
546 change->data.tp.oldtuple = NULL;
547 }
548 break;
550 if (change->data.msg.prefix != NULL)
551 pfree(change->data.msg.prefix);
552 change->data.msg.prefix = NULL;
553 if (change->data.msg.message != NULL)
554 pfree(change->data.msg.message);
555 change->data.msg.message = NULL;
556 break;
558 if (change->data.inval.invalidations)
559 pfree(change->data.inval.invalidations);
560 change->data.inval.invalidations = NULL;
561 break;
563 if (change->data.snapshot)
564 {
566 change->data.snapshot = NULL;
567 }
568 break;
569 /* no data in addition to the struct itself */
571 if (change->data.truncate.relids != NULL)
572 {
574 change->data.truncate.relids = NULL;
575 }
576 break;
581 break;
582 }
583
584 pfree(change);
585}

References ReorderBufferChange::action, ReorderBufferChange::data, fb(), ReorderBufferChange::inval, ReorderBufferChange::invalidations, ReorderBufferChange::message, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::oldtuple, pfree(), ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferFreeRelids(), ReorderBufferFreeSnap(), ReorderBufferFreeTupleBuf(), ReorderBufferChange::snapshot, ReorderBufferChange::tp, and ReorderBufferChange::truncate.

Referenced by ReorderBufferCleanupTXN(), ReorderBufferIterTXNFinish(), ReorderBufferIterTXNNext(), ReorderBufferProcessTXN(), ReorderBufferQueueChange(), ReorderBufferResetTXN(), ReorderBufferRestoreChanges(), ReorderBufferSerializeTXN(), ReorderBufferToastReset(), and ReorderBufferTruncateTXN().

◆ ReorderBufferFreeRelids()

void ReorderBufferFreeRelids ( ReorderBuffer rb,
Oid relids 
)

Definition at line 641 of file reorderbuffer.c.

642{
643 pfree(relids);
644}

References pfree().

Referenced by ReorderBufferFreeChange().

◆ ReorderBufferFreeSnap()

static void ReorderBufferFreeSnap ( ReorderBuffer rb,
Snapshot  snap 
)
static

Definition at line 1968 of file reorderbuffer.c.

1969{
1970 if (snap->copied)
1971 pfree(snap);
1972 else
1974}

References fb(), pfree(), and SnapBuildSnapDecRefcount().

Referenced by ReorderBufferCleanupTXN(), ReorderBufferFreeChange(), ReorderBufferProcessTXN(), and ReorderBufferStreamTXN().

◆ ReorderBufferFreeTupleBuf()

void ReorderBufferFreeTupleBuf ( HeapTuple  tuple)

Definition at line 610 of file reorderbuffer.c.

611{
612 pfree(tuple);
613}

References pfree().

Referenced by ReorderBufferFreeChange().

◆ ReorderBufferFreeTXN()

static void ReorderBufferFreeTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 459 of file reorderbuffer.c.

460{
461 /* clean the lookup cache if we were cached (quite likely) */
462 if (rb->by_txn_last_xid == txn->xid)
463 {
464 rb->by_txn_last_xid = InvalidTransactionId;
465 rb->by_txn_last_txn = NULL;
466 }
467
468 /* free data that's contained */
469
470 if (txn->gid != NULL)
471 {
472 pfree(txn->gid);
473 txn->gid = NULL;
474 }
475
476 if (txn->tuplecid_hash != NULL)
477 {
479 txn->tuplecid_hash = NULL;
480 }
481
482 if (txn->invalidations)
483 {
484 pfree(txn->invalidations);
485 txn->invalidations = NULL;
486 }
487
489 {
492 }
493
494 /* Reset the toast hash */
496
497 /* All changes must be deallocated */
498 Assert(txn->size == 0);
499
500 pfree(txn);
501}

References Assert, fb(), ReorderBufferTXN::gid, hash_destroy(), ReorderBufferTXN::invalidations, ReorderBufferTXN::invalidations_distributed, InvalidTransactionId, pfree(), ReorderBufferToastReset(), ReorderBufferTXN::size, ReorderBufferTXN::tuplecid_hash, and ReorderBufferTXN::xid.

Referenced by ReorderBufferCleanupTXN().

◆ ReorderBufferGetCatalogChangesXacts()

TransactionId * ReorderBufferGetCatalogChangesXacts ( ReorderBuffer rb)

Definition at line 3692 of file reorderbuffer.c.

3693{
3694 dlist_iter iter;
3695 TransactionId *xids = NULL;
3696 size_t xcnt = 0;
3697
3698 /* Quick return if the list is empty */
3699 if (dclist_count(&rb->catchange_txns) == 0)
3700 return NULL;
3701
3702 /* Initialize XID array */
3703 xids = palloc_array(TransactionId, dclist_count(&rb->catchange_txns));
3704 dclist_foreach(iter, &rb->catchange_txns)
3705 {
3707 catchange_node,
3708 iter.cur);
3709
3711
3712 xids[xcnt++] = txn->xid;
3713 }
3714
3715 qsort(xids, xcnt, sizeof(TransactionId), xidComparator);
3716
3717 Assert(xcnt == dclist_count(&rb->catchange_txns));
3718 return xids;
3719}

References Assert, dlist_iter::cur, dclist_container, dclist_count(), dclist_foreach, fb(), palloc_array, qsort, rbtxn_has_catalog_changes, ReorderBufferTXN::xid, and xidComparator().

Referenced by SnapBuildSerialize().

◆ ReorderBufferGetInvalidations()

uint32 ReorderBufferGetInvalidations ( ReorderBuffer rb,
TransactionId  xid,
SharedInvalidationMessage **  msgs 
)

Definition at line 5632 of file reorderbuffer.c.

5634{
5635 ReorderBufferTXN *txn;
5636
5637 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
5638 false);
5639
5640 if (txn == NULL)
5641 return 0;
5642
5643 *msgs = txn->invalidations;
5644
5645 return txn->ninvalidations;
5646}

References fb(), ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, and ReorderBufferTXNByXid().

Referenced by SnapBuildDistributeSnapshotAndInval().

◆ ReorderBufferGetOldestTXN()

ReorderBufferTXN * ReorderBufferGetOldestTXN ( ReorderBuffer rb)

Definition at line 1044 of file reorderbuffer.c.

1045{
1046 ReorderBufferTXN *txn;
1047
1049
1050 if (dlist_is_empty(&rb->toplevel_by_lsn))
1051 return NULL;
1052
1053 txn = dlist_head_element(ReorderBufferTXN, node, &rb->toplevel_by_lsn);
1054
1057 return txn;
1058}

References Assert, AssertTXNLsnOrder(), dlist_head_element, dlist_is_empty(), fb(), ReorderBufferTXN::first_lsn, rbtxn_is_known_subxact, and XLogRecPtrIsValid.

Referenced by SnapBuildProcessRunningXacts().

◆ ReorderBufferGetOldestXmin()

TransactionId ReorderBufferGetOldestXmin ( ReorderBuffer rb)

Definition at line 1072 of file reorderbuffer.c.

1073{
1074 ReorderBufferTXN *txn;
1075
1077
1078 if (dlist_is_empty(&rb->txns_by_base_snapshot_lsn))
1079 return InvalidTransactionId;
1080
1081 txn = dlist_head_element(ReorderBufferTXN, base_snapshot_node,
1082 &rb->txns_by_base_snapshot_lsn);
1083 return txn->base_snapshot->xmin;
1084}

References AssertTXNLsnOrder(), ReorderBufferTXN::base_snapshot, dlist_head_element, dlist_is_empty(), fb(), InvalidTransactionId, and SnapshotData::xmin.

Referenced by SnapBuildProcessRunningXacts().

◆ ReorderBufferImmediateInvalidation()

void ReorderBufferImmediateInvalidation ( ReorderBuffer rb,
uint32  ninvalidations,
SharedInvalidationMessage invalidations 
)

Definition at line 3254 of file reorderbuffer.c.

3256{
3260 int i;
3261
3262 if (use_subtxn)
3264
3265 /*
3266 * Force invalidations to happen outside of a valid transaction - that way
3267 * entries will just be marked as invalid without accessing the catalog.
3268 * That's advantageous because we don't need to setup the full state
3269 * necessary for catalog access.
3270 */
3271 if (use_subtxn)
3273
3274 for (i = 0; i < ninvalidations; i++)
3275 LocalExecuteInvalidationMessage(&invalidations[i]);
3276
3277 if (use_subtxn)
3278 {
3281 CurrentResourceOwner = cowner;
3282 }
3283}

References AbortCurrentTransaction(), BeginInternalSubTransaction(), CurrentMemoryContext, CurrentResourceOwner, fb(), i, IsTransactionOrTransactionBlock(), LocalExecuteInvalidationMessage(), MemoryContextSwitchTo(), and RollbackAndReleaseCurrentSubTransaction().

Referenced by ReorderBufferAbort(), ReorderBufferForget(), ReorderBufferInvalidate(), and xact_decode().

◆ ReorderBufferInvalidate()

void ReorderBufferInvalidate ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3223 of file reorderbuffer.c.

3224{
3225 ReorderBufferTXN *txn;
3226
3227 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3228 false);
3229
3230 /* unknown, nothing to do */
3231 if (txn == NULL)
3232 return;
3233
3234 /*
3235 * Process cache invalidation messages if there are any. Even if we're not
3236 * interested in the transaction's contents, it could have manipulated the
3237 * catalog and we need to update the caches according to that.
3238 */
3239 if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
3241 txn->invalidations);
3242 else
3243 Assert(txn->ninvalidations == 0);
3244}

References Assert, ReorderBufferTXN::base_snapshot, fb(), ReorderBufferTXN::invalidations, InvalidXLogRecPtr, ReorderBufferTXN::ninvalidations, ReorderBufferImmediateInvalidation(), and ReorderBufferTXNByXid().

Referenced by DecodePrepare().

◆ ReorderBufferIterCompare()

static int ReorderBufferIterCompare ( Datum  a,
Datum  b,
void arg 
)
static

Definition at line 1261 of file reorderbuffer.c.

1262{
1264 XLogRecPtr pos_a = state->entries[DatumGetInt32(a)].lsn;
1265 XLogRecPtr pos_b = state->entries[DatumGetInt32(b)].lsn;
1266
1267 if (pos_a < pos_b)
1268 return 1;
1269 else if (pos_a == pos_b)
1270 return 0;
1271 return -1;
1272}

References a, arg, b, DatumGetInt32(), and fb().

Referenced by ReorderBufferIterTXNInit().

◆ ReorderBufferIterTXNFinish()

static void ReorderBufferIterTXNFinish ( ReorderBuffer rb,
ReorderBufferIterTXNState state 
)
static

Definition at line 1504 of file reorderbuffer.c.

1506{
1507 int32 off;
1508
1509 for (off = 0; off < state->nr_txns; off++)
1510 {
1511 if (state->entries[off].file.vfd != -1)
1512 FileClose(state->entries[off].file.vfd);
1513 }
1514
1515 /* free memory we might have "leaked" in the last *Next call */
1516 if (!dlist_is_empty(&state->old_change))
1517 {
1518 ReorderBufferChange *change;
1519
1520 change = dlist_container(ReorderBufferChange, node,
1521 dlist_pop_head_node(&state->old_change));
1522 ReorderBufferFreeChange(rb, change, true);
1523 Assert(dlist_is_empty(&state->old_change));
1524 }
1525
1526 binaryheap_free(state->heap);
1527 pfree(state);
1528}

References Assert, binaryheap_free(), dlist_container, dlist_is_empty(), dlist_pop_head_node(), fb(), FileClose(), pfree(), and ReorderBufferFreeChange().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferIterTXNInit()

static void ReorderBufferIterTXNInit ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferIterTXNState *volatile iter_state 
)
static

Definition at line 1284 of file reorderbuffer.c.

1286{
1287 Size nr_txns = 0;
1290 int32 off;
1291
1292 *iter_state = NULL;
1293
1294 /* Check ordering of changes in the toplevel transaction. */
1296
1297 /*
1298 * Calculate the size of our heap: one element for every transaction that
1299 * contains changes. (Besides the transactions already in the reorder
1300 * buffer, we count the one we were directly passed.)
1301 */
1302 if (txn->nentries > 0)
1303 nr_txns++;
1304
1306 {
1308
1310
1311 /* Check ordering of changes in this subtransaction. */
1313
1314 if (cur_txn->nentries > 0)
1315 nr_txns++;
1316 }
1317
1318 /* allocate iteration state */
1320 MemoryContextAllocZero(rb->context,
1322 sizeof(ReorderBufferIterTXNEntry) * nr_txns);
1323
1324 state->nr_txns = nr_txns;
1325 dlist_init(&state->old_change);
1326
1327 for (off = 0; off < state->nr_txns; off++)
1328 {
1329 state->entries[off].file.vfd = -1;
1330 state->entries[off].segno = 0;
1331 }
1332
1333 /* allocate heap */
1334 state->heap = binaryheap_allocate(state->nr_txns,
1336 state);
1337
1338 /* Now that the state fields are initialized, it is safe to return it. */
1339 *iter_state = state;
1340
1341 /*
1342 * Now insert items into the binary heap, in an unordered fashion. (We
1343 * will run a heap assembly step at the end; this is more efficient.)
1344 */
1345
1346 off = 0;
1347
1348 /* add toplevel transaction if it contains changes */
1349 if (txn->nentries > 0)
1350 {
1352
1353 if (rbtxn_is_serialized(txn))
1354 {
1355 /* serialize remaining changes */
1357 ReorderBufferRestoreChanges(rb, txn, &state->entries[off].file,
1358 &state->entries[off].segno);
1359 }
1360
1362 &txn->changes);
1363
1364 state->entries[off].lsn = cur_change->lsn;
1365 state->entries[off].change = cur_change;
1366 state->entries[off].txn = txn;
1367
1369 }
1370
1371 /* add subtransactions if they contain changes */
1373 {
1375
1377
1378 if (cur_txn->nentries > 0)
1379 {
1381
1383 {
1384 /* serialize remaining changes */
1387 &state->entries[off].file,
1388 &state->entries[off].segno);
1389 }
1391 &cur_txn->changes);
1392
1393 state->entries[off].lsn = cur_change->lsn;
1394 state->entries[off].change = cur_change;
1395 state->entries[off].txn = cur_txn;
1396
1398 }
1399 }
1400
1401 /* assemble a valid binary heap */
1402 binaryheap_build(state->heap);
1403}

References AssertChangeLsnOrder(), binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), ReorderBufferTXN::changes, dlist_container, dlist_foreach, dlist_head_element, dlist_init(), fb(), Int32GetDatum(), MemoryContextAllocZero(), ReorderBufferTXN::nentries, rbtxn_is_serialized, ReorderBufferIterCompare(), ReorderBufferRestoreChanges(), ReorderBufferSerializeTXN(), and ReorderBufferTXN::subtxns.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferIterTXNNext()

static ReorderBufferChange * ReorderBufferIterTXNNext ( ReorderBuffer rb,
ReorderBufferIterTXNState state 
)
static

Definition at line 1412 of file reorderbuffer.c.

1413{
1414 ReorderBufferChange *change;
1416 int32 off;
1417
1418 /* nothing there anymore */
1419 if (binaryheap_empty(state->heap))
1420 return NULL;
1421
1422 off = DatumGetInt32(binaryheap_first(state->heap));
1423 entry = &state->entries[off];
1424
1425 /* free memory we might have "leaked" in the previous *Next call */
1426 if (!dlist_is_empty(&state->old_change))
1427 {
1428 change = dlist_container(ReorderBufferChange, node,
1429 dlist_pop_head_node(&state->old_change));
1430 ReorderBufferFreeChange(rb, change, true);
1431 Assert(dlist_is_empty(&state->old_change));
1432 }
1433
1434 change = entry->change;
1435
1436 /*
1437 * update heap with information about which transaction has the next
1438 * relevant change in LSN order
1439 */
1440
1441 /* there are in-memory changes */
1442 if (dlist_has_next(&entry->txn->changes, &entry->change->node))
1443 {
1444 dlist_node *next = dlist_next_node(&entry->txn->changes, &change->node);
1447
1448 /* txn stays the same */
1449 state->entries[off].lsn = next_change->lsn;
1450 state->entries[off].change = next_change;
1451
1453 return change;
1454 }
1455
1456 /* try to load changes from disk */
1457 if (entry->txn->nentries != entry->txn->nentries_mem)
1458 {
1459 /*
1460 * Ugly: restoring changes will reuse *Change records, thus delete the
1461 * current one from the per-tx list and only free in the next call.
1462 */
1463 dlist_delete(&change->node);
1464 dlist_push_tail(&state->old_change, &change->node);
1465
1466 /*
1467 * Update the total bytes processed by the txn for which we are
1468 * releasing the current set of changes and restoring the new set of
1469 * changes.
1470 */
1471 rb->totalBytes += entry->txn->size;
1472 if (ReorderBufferRestoreChanges(rb, entry->txn, &entry->file,
1473 &state->entries[off].segno))
1474 {
1475 /* successfully restored changes from disk */
1478 &entry->txn->changes);
1479
1480 elog(DEBUG2, "restored %u/%u changes from disk",
1481 (uint32) entry->txn->nentries_mem,
1482 (uint32) entry->txn->nentries);
1483
1484 Assert(entry->txn->nentries_mem);
1485 /* txn stays the same */
1486 state->entries[off].lsn = next_change->lsn;
1487 state->entries[off].change = next_change;
1489
1490 return change;
1491 }
1492 }
1493
1494 /* ok, no changes there anymore, remove */
1496
1497 return change;
1498}

References Assert, binaryheap_empty, binaryheap_first(), binaryheap_remove_first(), binaryheap_replace_first(), ReorderBufferIterTXNEntry::change, ReorderBufferTXN::changes, DatumGetInt32(), DEBUG2, dlist_container, dlist_delete(), dlist_has_next(), dlist_head_element, dlist_is_empty(), dlist_next_node(), dlist_pop_head_node(), dlist_push_tail(), elog, fb(), ReorderBufferIterTXNEntry::file, Int32GetDatum(), ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, next, ReorderBufferChange::node, ReorderBufferFreeChange(), ReorderBufferRestoreChanges(), ReorderBufferTXN::size, and ReorderBufferIterTXNEntry::txn.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferLargestStreamableTopTXN()

static ReorderBufferTXN * ReorderBufferLargestStreamableTopTXN ( ReorderBuffer rb)
static

Definition at line 3847 of file reorderbuffer.c.

3848{
3849 dlist_iter iter;
3850 Size largest_size = 0;
3852
3853 /* Find the largest top-level transaction having a base snapshot. */
3854 dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
3855 {
3856 ReorderBufferTXN *txn;
3857
3858 txn = dlist_container(ReorderBufferTXN, base_snapshot_node, iter.cur);
3859
3860 /* must not be a subtxn */
3862 /* base_snapshot must be set */
3863 Assert(txn->base_snapshot != NULL);
3864
3865 /* Don't consider these kinds of transactions for eviction. */
3866 if (rbtxn_has_partial_change(txn) ||
3868 rbtxn_is_aborted(txn))
3869 continue;
3870
3871 /* Find the largest of the eviction candidates. */
3872 if ((largest == NULL || txn->total_size > largest_size) &&
3873 (txn->total_size > 0))
3874 {
3875 largest = txn;
3876 largest_size = txn->total_size;
3877 }
3878 }
3879
3880 return largest;
3881}

References Assert, ReorderBufferTXN::base_snapshot, dlist_iter::cur, dlist_container, dlist_foreach, fb(), rbtxn_has_partial_change, rbtxn_has_streamable_change, rbtxn_is_aborted, rbtxn_is_known_subxact, and ReorderBufferTXN::total_size.

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferLargestTXN()

static ReorderBufferTXN * ReorderBufferLargestTXN ( ReorderBuffer rb)
static

Definition at line 3806 of file reorderbuffer.c.

3807{
3809
3810 /* Get the largest transaction from the max-heap */
3812 pairingheap_first(rb->txn_heap));
3813
3814 Assert(largest);
3815 Assert(largest->size > 0);
3816 Assert(largest->size <= rb->size);
3817
3818 return largest;
3819}

References Assert, fb(), pairingheap_container, and pairingheap_first().

Referenced by ReorderBufferCheckMemoryLimit().

◆ ReorderBufferMaybeMarkTXNStreamed()

static void ReorderBufferMaybeMarkTXNStreamed ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 2138 of file reorderbuffer.c.

2139{
2140 /*
2141 * The top-level transaction, is marked as streamed always, even if it
2142 * does not contain any changes (that is, when all the changes are in
2143 * subtransactions).
2144 *
2145 * For subtransactions, we only mark them as streamed when there are
2146 * changes in them.
2147 *
2148 * We do it this way because of aborts - we don't want to send aborts for
2149 * XIDs the downstream is not aware of. And of course, it always knows
2150 * about the top-level xact (we send the XID in all messages), but we
2151 * never stream XIDs of empty subxacts.
2152 */
2153 if (rbtxn_is_toptxn(txn) || (txn->nentries_mem != 0))
2155}

References ReorderBufferTXN::nentries_mem, RBTXN_IS_STREAMED, rbtxn_is_toptxn, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferProcessTXN(), and ReorderBufferTruncateTXN().

◆ ReorderBufferPrepare()

void ReorderBufferPrepare ( ReorderBuffer rb,
TransactionId  xid,
char gid 
)

Definition at line 2961 of file reorderbuffer.c.

2963{
2964 ReorderBufferTXN *txn;
2965
2966 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
2967 false);
2968
2969 /* unknown transaction, nothing to replay */
2970 if (txn == NULL)
2971 return;
2972
2973 /*
2974 * txn must have been marked as a prepared transaction and must have
2975 * neither been skipped nor sent a prepare. Also, the prepare info must
2976 * have been updated in it by now.
2977 */
2980
2981 txn->gid = pstrdup(gid);
2982
2983 ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn,
2984 txn->prepare_time, txn->origin_id, txn->origin_lsn);
2985
2986 /*
2987 * Send a prepare if not already done so. This might occur if we have
2988 * detected a concurrent abort while replaying the non-streaming
2989 * transaction.
2990 */
2991 if (!rbtxn_sent_prepare(txn))
2992 {
2993 rb->prepare(rb, txn, txn->final_lsn);
2995 }
2996}

References Assert, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::gid, InvalidXLogRecPtr, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, pstrdup(), RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, ReorderBufferReplay(), ReorderBufferTXNByXid(), ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodePrepare().

◆ ReorderBufferProcessPartialChange()

static void ReorderBufferProcessPartialChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
ReorderBufferChange change,
bool  toast_insert 
)
static

Definition at line 741 of file reorderbuffer.c.

744{
745 ReorderBufferTXN *toptxn;
746
747 /*
748 * The partial changes need to be processed only while streaming
749 * in-progress transactions.
750 */
752 return;
753
754 /* Get the top transaction. */
755 toptxn = rbtxn_get_toptxn(txn);
756
757 /*
758 * Indicate a partial change for toast inserts. The change will be
759 * considered as complete once we get the insert or update on the main
760 * table and we are sure that the pending toast chunks are not required
761 * anymore.
762 *
763 * If we allow streaming when there are pending toast chunks then such
764 * chunks won't be released till the insert (multi_insert) is complete and
765 * we expect the txn to have streamed all changes after streaming. This
766 * restriction is mainly to ensure the correctness of streamed
767 * transactions and it doesn't seem worth uplifting such a restriction
768 * just to allow this case because anyway we will stream the transaction
769 * once such an insert is complete.
770 */
771 if (toast_insert)
773 else if (rbtxn_has_partial_change(toptxn) &&
774 IsInsertOrUpdate(change->action) &&
777
778 /*
779 * Indicate a partial change for speculative inserts. The change will be
780 * considered as complete once we get the speculative confirm or abort
781 * token.
782 */
783 if (IsSpecInsert(change->action))
785 else if (rbtxn_has_partial_change(toptxn) &&
788
789 /*
790 * Stream the transaction if it is serialized before and the changes are
791 * now complete in the top-level transaction.
792 *
793 * The reason for doing the streaming of such a transaction as soon as we
794 * get the complete change for it is that previously it would have reached
795 * the memory threshold and wouldn't get streamed because of incomplete
796 * changes. Delaying such transactions would increase apply lag for them.
797 */
799 !(rbtxn_has_partial_change(toptxn)) &&
800 rbtxn_is_serialized(txn) &&
802 ReorderBufferStreamTXN(rb, toptxn);
803}

References ReorderBufferChange::action, ReorderBufferChange::clear_toast_afterwards, ReorderBufferChange::data, fb(), IsInsertOrUpdate, IsSpecConfirmOrAbort, IsSpecInsert, rbtxn_get_toptxn, RBTXN_HAS_PARTIAL_CHANGE, rbtxn_has_partial_change, rbtxn_has_streamable_change, rbtxn_is_serialized, ReorderBufferCanStartStreaming(), ReorderBufferCanStream(), ReorderBufferStreamTXN(), ReorderBufferChange::tp, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferQueueChange().

◆ ReorderBufferProcessTXN()

static void ReorderBufferProcessTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
XLogRecPtr  commit_lsn,
volatile Snapshot  snapshot_now,
volatile CommandId  command_id,
bool  streaming 
)
static

Definition at line 2211 of file reorderbuffer.c.

2216{
2217 bool using_subtxn;
2223 volatile bool stream_started = false;
2224 ReorderBufferTXN *volatile curtxn = NULL;
2225
2226 /* build data to be able to lookup the CommandIds of catalog tuples */
2228
2229 /* setup the initial snapshot */
2230 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2231
2232 /*
2233 * Decoding needs access to syscaches et al., which in turn use
2234 * heavyweight locks and such. Thus we need to have enough state around to
2235 * keep track of those. The easiest way is to simply use a transaction
2236 * internally. That also allows us to easily enforce that nothing writes
2237 * to the database by checking for xid assignments.
2238 *
2239 * When we're called via the SQL SRF there's already a transaction
2240 * started, so start an explicit subtransaction there.
2241 */
2243
2244 PG_TRY();
2245 {
2246 ReorderBufferChange *change;
2247 int changes_count = 0; /* used to accumulate the number of
2248 * changes */
2249
2250 if (using_subtxn)
2251 BeginInternalSubTransaction(streaming ? "stream" : "replay");
2252 else
2254
2255 /*
2256 * We only need to send begin/begin-prepare for non-streamed
2257 * transactions.
2258 */
2259 if (!streaming)
2260 {
2261 if (rbtxn_is_prepared(txn))
2262 rb->begin_prepare(rb, txn);
2263 else
2264 rb->begin(rb, txn);
2265 }
2266
2268 while ((change = ReorderBufferIterTXNNext(rb, iterstate)) != NULL)
2269 {
2270 Relation relation = NULL;
2271 Oid reloid;
2272
2274
2275 /*
2276 * We can't call start stream callback before processing first
2277 * change.
2278 */
2280 {
2281 if (streaming)
2282 {
2283 txn->origin_id = change->origin_id;
2284 rb->stream_start(rb, txn, change->lsn);
2285 stream_started = true;
2286 }
2287 }
2288
2289 /*
2290 * Enforce correct ordering of changes, merged from multiple
2291 * subtransactions. The changes may have the same LSN due to
2292 * MULTI_INSERT xlog records.
2293 */
2295
2296 prev_lsn = change->lsn;
2297
2298 /*
2299 * Set the current xid to detect concurrent aborts. This is
2300 * required for the cases when we decode the changes before the
2301 * COMMIT record is processed.
2302 */
2303 if (streaming || rbtxn_is_prepared(change->txn))
2304 {
2305 curtxn = change->txn;
2307 }
2308
2309 switch (change->action)
2310 {
2312
2313 /*
2314 * Confirmation for speculative insertion arrived. Simply
2315 * use as a normal record. It'll be cleaned up at the end
2316 * of INSERT processing.
2317 */
2318 if (specinsert == NULL)
2319 elog(ERROR, "invalid ordering of speculative insertion changes");
2320 Assert(specinsert->data.tp.oldtuple == NULL);
2321 change = specinsert;
2323
2324 /* intentionally fall through */
2329 Assert(snapshot_now);
2330
2331 reloid = RelidByRelfilenumber(change->data.tp.rlocator.spcOid,
2332 change->data.tp.rlocator.relNumber);
2333
2334 /*
2335 * Mapped catalog tuple without data, emitted while
2336 * catalog table was in the process of being rewritten. We
2337 * can fail to look up the relfilenumber, because the
2338 * relmapper has no "historic" view, in contrast to the
2339 * normal catalog during decoding. Thus repeated rewrites
2340 * can cause a lookup failure. That's OK because we do not
2341 * decode catalog changes anyway. Normally such tuples
2342 * would be skipped over below, but we can't identify
2343 * whether the table should be logically logged without
2344 * mapping the relfilenumber to the oid.
2345 */
2346 if (reloid == InvalidOid &&
2347 change->data.tp.newtuple == NULL &&
2348 change->data.tp.oldtuple == NULL)
2349 goto change_done;
2350 else if (reloid == InvalidOid)
2351 elog(ERROR, "could not map filenumber \"%s\" to relation OID",
2352 relpathperm(change->data.tp.rlocator,
2353 MAIN_FORKNUM).str);
2354
2355 relation = RelationIdGetRelation(reloid);
2356
2357 if (!RelationIsValid(relation))
2358 elog(ERROR, "could not open relation with OID %u (for filenumber \"%s\")",
2359 reloid,
2360 relpathperm(change->data.tp.rlocator,
2361 MAIN_FORKNUM).str);
2362
2363 if (!RelationIsLogicallyLogged(relation))
2364 goto change_done;
2365
2366 /*
2367 * Ignore temporary heaps created during DDL unless the
2368 * plugin has asked for them.
2369 */
2370 if (relation->rd_rel->relrewrite && !rb->output_rewrites)
2371 goto change_done;
2372
2373 /*
2374 * For now ignore sequence changes entirely. Most of the
2375 * time they don't log changes using records we
2376 * understand, so it doesn't make sense to handle the few
2377 * cases we do.
2378 */
2379 if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
2380 goto change_done;
2381
2382 /* user-triggered change */
2383 if (!IsToastRelation(relation))
2384 {
2385 ReorderBufferToastReplace(rb, txn, relation, change);
2386 ReorderBufferApplyChange(rb, txn, relation, change,
2387 streaming);
2388
2389 /*
2390 * Only clear reassembled toast chunks if we're sure
2391 * they're not required anymore. The creator of the
2392 * tuple tells us.
2393 */
2394 if (change->data.tp.clear_toast_afterwards)
2396 }
2397 /* we're not interested in toast deletions */
2398 else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
2399 {
2400 /*
2401 * Need to reassemble the full toasted Datum in
2402 * memory, to ensure the chunks don't get reused till
2403 * we're done remove it from the list of this
2404 * transaction's changes. Otherwise it will get
2405 * freed/reused while restoring spooled data from
2406 * disk.
2407 */
2408 Assert(change->data.tp.newtuple != NULL);
2409
2410 dlist_delete(&change->node);
2411 ReorderBufferToastAppendChunk(rb, txn, relation,
2412 change);
2413 }
2414
2416
2417 /*
2418 * If speculative insertion was confirmed, the record
2419 * isn't needed anymore.
2420 */
2421 if (specinsert != NULL)
2422 {
2424 specinsert = NULL;
2425 }
2426
2427 if (RelationIsValid(relation))
2428 {
2429 RelationClose(relation);
2430 relation = NULL;
2431 }
2432 break;
2433
2435
2436 /*
2437 * Speculative insertions are dealt with by delaying the
2438 * processing of the insert until the confirmation record
2439 * arrives. For that we simply unlink the record from the
2440 * chain, so it does not get freed/reused while restoring
2441 * spooled data from disk.
2442 *
2443 * This is safe in the face of concurrent catalog changes
2444 * because the relevant relation can't be changed between
2445 * speculative insertion and confirmation due to
2446 * CheckTableNotInUse() and locking.
2447 */
2448
2449 /* clear out a pending (and thus failed) speculation */
2450 if (specinsert != NULL)
2451 {
2453 specinsert = NULL;
2454 }
2455
2456 /* and memorize the pending insertion */
2457 dlist_delete(&change->node);
2458 specinsert = change;
2459 break;
2460
2462
2463 /*
2464 * Abort for speculative insertion arrived. So cleanup the
2465 * specinsert tuple and toast hash.
2466 *
2467 * Note that we get the spec abort change for each toast
2468 * entry but we need to perform the cleanup only the first
2469 * time we get it for the main table.
2470 */
2471 if (specinsert != NULL)
2472 {
2473 /*
2474 * We must clean the toast hash before processing a
2475 * completely new tuple to avoid confusion about the
2476 * previous tuple's toast chunks.
2477 */
2480
2481 /* We don't need this record anymore. */
2483 specinsert = NULL;
2484 }
2485 break;
2486
2488 {
2489 int i;
2490 int nrelids = change->data.truncate.nrelids;
2491 int nrelations = 0;
2492 Relation *relations;
2493
2494 relations = palloc0(nrelids * sizeof(Relation));
2495 for (i = 0; i < nrelids; i++)
2496 {
2497 Oid relid = change->data.truncate.relids[i];
2498 Relation rel;
2499
2500 rel = RelationIdGetRelation(relid);
2501
2502 if (!RelationIsValid(rel))
2503 elog(ERROR, "could not open relation with OID %u", relid);
2504
2505 if (!RelationIsLogicallyLogged(rel))
2506 continue;
2507
2508 relations[nrelations++] = rel;
2509 }
2510
2511 /* Apply the truncate. */
2513 relations, change,
2514 streaming);
2515
2516 for (i = 0; i < nrelations; i++)
2517 RelationClose(relations[i]);
2518
2519 break;
2520 }
2521
2523 ReorderBufferApplyMessage(rb, txn, change, streaming);
2524 break;
2525
2527 /* Execute the invalidation messages locally */
2529 change->data.inval.invalidations);
2530 break;
2531
2533 /* get rid of the old */
2535
2536 if (snapshot_now->copied)
2537 {
2538 ReorderBufferFreeSnap(rb, snapshot_now);
2539 snapshot_now =
2541 txn, command_id);
2542 }
2543
2544 /*
2545 * Restored from disk, need to be careful not to double
2546 * free. We could introduce refcounting for that, but for
2547 * now this seems infrequent enough not to care.
2548 */
2549 else if (change->data.snapshot->copied)
2550 {
2551 snapshot_now =
2553 txn, command_id);
2554 }
2555 else
2556 {
2557 snapshot_now = change->data.snapshot;
2558 }
2559
2560 /* and continue with the new one */
2561 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2562 break;
2563
2566
2567 if (command_id < change->data.command_id)
2568 {
2569 command_id = change->data.command_id;
2570
2571 if (!snapshot_now->copied)
2572 {
2573 /* we don't use the global one anymore */
2574 snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2575 txn, command_id);
2576 }
2577
2578 snapshot_now->curcid = command_id;
2579
2581 SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
2582 }
2583
2584 break;
2585
2587 elog(ERROR, "tuplecid value in changequeue");
2588 break;
2589 }
2590
2591 /*
2592 * It is possible that the data is not sent to downstream for a
2593 * long time either because the output plugin filtered it or there
2594 * is a DDL that generates a lot of data that is not processed by
2595 * the plugin. So, in such cases, the downstream can timeout. To
2596 * avoid that we try to send a keepalive message if required.
2597 * Trying to send a keepalive message after every change has some
2598 * overhead, but testing showed there is no noticeable overhead if
2599 * we do it after every ~100 changes.
2600 */
2601#define CHANGES_THRESHOLD 100
2602
2604 {
2605 rb->update_progress_txn(rb, txn, prev_lsn);
2606 changes_count = 0;
2607 }
2608 }
2609
2610 /* speculative insertion record must be freed by now */
2612
2613 /* clean up the iterator */
2615 iterstate = NULL;
2616
2617 /*
2618 * Update total transaction count and total bytes processed by the
2619 * transaction and its subtransactions. Ensure to not count the
2620 * streamed transaction multiple times.
2621 *
2622 * Note that the statistics computation has to be done after
2623 * ReorderBufferIterTXNFinish as it releases the serialized change
2624 * which we have already accounted in ReorderBufferIterTXNNext.
2625 */
2626 if (!rbtxn_is_streamed(txn))
2627 rb->totalTxns++;
2628
2629 rb->totalBytes += txn->total_size;
2630
2631 /*
2632 * Done with current changes, send the last message for this set of
2633 * changes depending upon streaming mode.
2634 */
2635 if (streaming)
2636 {
2637 if (stream_started)
2638 {
2639 rb->stream_stop(rb, txn, prev_lsn);
2640 stream_started = false;
2641 }
2642 }
2643 else
2644 {
2645 /*
2646 * Call either PREPARE (for two-phase transactions) or COMMIT (for
2647 * regular ones).
2648 */
2649 if (rbtxn_is_prepared(txn))
2650 {
2652 rb->prepare(rb, txn, commit_lsn);
2654 }
2655 else
2656 rb->commit(rb, txn, commit_lsn);
2657 }
2658
2659 /* this is just a sanity check against bad output plugin behaviour */
2661 elog(ERROR, "output plugin used XID %u",
2663
2664 /*
2665 * Remember the command ID and snapshot for the next set of changes in
2666 * streaming mode.
2667 */
2668 if (streaming)
2669 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2670 else if (snapshot_now->copied)
2671 ReorderBufferFreeSnap(rb, snapshot_now);
2672
2673 /* cleanup */
2675
2676 /*
2677 * Aborting the current (sub-)transaction as a whole has the right
2678 * semantics. We want all locks acquired in here to be released, not
2679 * reassigned to the parent and we do not want any database access
2680 * have persistent effects.
2681 */
2683
2684 /* make sure there's no cache pollution */
2686 {
2689 }
2690 else
2691 {
2695 }
2696
2697 if (using_subtxn)
2698 {
2701 CurrentResourceOwner = cowner;
2702 }
2703
2704 /*
2705 * We are here due to one of the four reasons: 1. Decoding an
2706 * in-progress txn. 2. Decoding a prepared txn. 3. Decoding of a
2707 * prepared txn that was (partially) streamed. 4. Decoding a committed
2708 * txn.
2709 *
2710 * For 1, we allow truncation of txn data by removing the changes
2711 * already streamed but still keeping other things like invalidations,
2712 * snapshot, and tuplecids. For 2 and 3, we indicate
2713 * ReorderBufferTruncateTXN to do more elaborate truncation of txn
2714 * data as the entire transaction has been decoded except for commit.
2715 * For 4, as the entire txn has been decoded, we can fully clean up
2716 * the TXN reorder buffer.
2717 */
2718 if (streaming || rbtxn_is_prepared(txn))
2719 {
2720 if (streaming)
2722
2724 /* Reset the CheckXidAlive */
2726 }
2727 else
2729 }
2730 PG_CATCH();
2731 {
2734
2735 /* TODO: Encapsulate cleanup from the PG_TRY and PG_CATCH blocks */
2736 if (iterstate)
2738
2740
2741 /*
2742 * Force cache invalidation to happen outside of a valid transaction
2743 * to prevent catalog access as we just caught an error.
2744 */
2746
2747 /* make sure there's no cache pollution */
2749 {
2752 }
2753 else
2754 {
2758 }
2759
2760 if (using_subtxn)
2761 {
2764 CurrentResourceOwner = cowner;
2765 }
2766
2767 /*
2768 * The error code ERRCODE_TRANSACTION_ROLLBACK indicates a concurrent
2769 * abort of the (sub)transaction we are streaming or preparing. We
2770 * need to do the cleanup and return gracefully on this error, see
2771 * SetupCheckXidLive.
2772 *
2773 * This error code can be thrown by one of the callbacks we call
2774 * during decoding so we need to ensure that we return gracefully only
2775 * when we are sending the data in streaming mode and the streaming is
2776 * not finished yet or when we are sending the data out on a PREPARE
2777 * during a two-phase commit.
2778 */
2779 if (errdata->sqlerrcode == ERRCODE_TRANSACTION_ROLLBACK &&
2781 {
2782 /* curtxn must be set for streaming or prepared transactions */
2783 Assert(curtxn);
2784
2785 /* Cleanup the temporary error state. */
2788 errdata = NULL;
2789
2790 /* Remember the transaction is aborted. */
2792 curtxn->txn_flags |= RBTXN_IS_ABORTED;
2793
2794 /* Mark the transaction is streamed if appropriate */
2795 if (stream_started)
2797
2798 /* Reset the TXN so that it is allowed to stream remaining data. */
2799 ReorderBufferResetTXN(rb, txn, snapshot_now,
2800 command_id, prev_lsn,
2801 specinsert);
2802 }
2803 else
2804 {
2807 PG_RE_THROW();
2808 }
2809 }
2810 PG_END_TRY();
2811}

References AbortCurrentTransaction(), ReorderBufferChange::action, Assert, BeginInternalSubTransaction(), CHANGES_THRESHOLD, CHECK_FOR_INTERRUPTS, CheckXidAlive, ReorderBufferChange::clear_toast_afterwards, ReorderBufferChange::command_id, SnapshotData::copied, CopyErrorData(), SnapshotData::curcid, CurrentMemoryContext, CurrentResourceOwner, ReorderBufferChange::data, data, dlist_delete(), elog, ERROR, fb(), FlushErrorState(), FreeErrorData(), GetCurrentTransactionId(), GetCurrentTransactionIdIfAny(), i, ReorderBufferChange::inval, InvalidateSystemCaches(), ReorderBufferChange::invalidations, ReorderBufferTXN::invalidations, ReorderBufferTXN::invalidations_distributed, InvalidCommandId, InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsToastRelation(), IsTransactionOrTransactionBlock(), ReorderBufferChange::lsn, MAIN_FORKNUM, MemoryContextSwitchTo(), ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::ninvalidations_distributed, ReorderBufferChange::node, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::origin_id, ReorderBufferTXN::origin_id, palloc0(), PG_CATCH, PG_END_TRY, pg_fallthrough, PG_RE_THROW, PG_TRY, rbtxn_distr_inval_overflowed, RBTXN_IS_ABORTED, rbtxn_is_committed, rbtxn_is_prepared, rbtxn_is_streamed, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, RelationData::rd_rel, RelationClose(), RelationIdGetRelation(), RelationIsLogicallyLogged, RelationIsValid, RelidByRelfilenumber(), ReorderBufferChange::relids, RelFileLocator::relNumber, relpathperm, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferApplyChange(), ReorderBufferApplyMessage(), ReorderBufferApplyTruncate(), ReorderBufferBuildTupleCidHash(), ReorderBufferCleanupTXN(), ReorderBufferCopySnap(), ReorderBufferExecuteInvalidations(), ReorderBufferFreeChange(), ReorderBufferFreeSnap(), ReorderBufferIterTXNFinish(), ReorderBufferIterTXNInit(), ReorderBufferIterTXNNext(), ReorderBufferMaybeMarkTXNStreamed(), ReorderBufferResetTXN(), ReorderBufferSaveTXNSnapshot(), ReorderBufferToastAppendChunk(), ReorderBufferToastReplace(), ReorderBufferToastReset(), ReorderBufferTruncateTXN(), ReorderBufferChange::rlocator, RollbackAndReleaseCurrentSubTransaction(), SetupCheckXidLive(), SetupHistoricSnapshot(), ReorderBufferChange::snapshot, RelFileLocator::spcOid, StartTransactionCommand(), TeardownHistoricSnapshot(), ReorderBufferTXN::total_size, ReorderBufferChange::tp, ReorderBufferChange::truncate, ReorderBufferTXN::tuplecid_hash, ReorderBufferChange::txn, ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by ReorderBufferReplay(), and ReorderBufferStreamTXN().

◆ ReorderBufferProcessXid()

void ReorderBufferProcessXid ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3296 of file reorderbuffer.c.

3297{
3298 /* many records won't have an xid assigned, centralize check here */
3299 if (xid != InvalidTransactionId)
3300 ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3301}

References fb(), InvalidTransactionId, and ReorderBufferTXNByXid().

Referenced by heap2_decode(), heap_decode(), LogicalDecodingProcessRecord(), logicalmsg_decode(), standby_decode(), xact_decode(), and xlog_decode().

◆ ReorderBufferQueueChange()

void ReorderBufferQueueChange ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
ReorderBufferChange change,
bool  toast_insert 
)

Definition at line 810 of file reorderbuffer.c.

812{
813 ReorderBufferTXN *txn;
814
815 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
816
817 /*
818 * If we have detected that the transaction is aborted while streaming the
819 * previous changes or by checking its CLOG, there is no point in
820 * collecting further changes for it.
821 */
822 if (rbtxn_is_aborted(txn))
823 {
824 /*
825 * We don't need to update memory accounting for this change as we
826 * have not added it to the queue yet.
827 */
828 ReorderBufferFreeChange(rb, change, false);
829 return;
830 }
831
832 /*
833 * The changes that are sent downstream are considered streamable. We
834 * remember such transactions so that only those will later be considered
835 * for streaming.
836 */
837 if (change->action == REORDER_BUFFER_CHANGE_INSERT ||
843 {
844 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
845
847 }
848
849 change->lsn = lsn;
850 change->txn = txn;
851
853 dlist_push_tail(&txn->changes, &change->node);
854 txn->nentries++;
855 txn->nentries_mem++;
856
857 /* update memory accounting information */
860
861 /* process partial change */
863
864 /* check the memory limits and evict something if needed */
866}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::changes, dlist_push_tail(), fb(), ReorderBufferChange::lsn, ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, rbtxn_get_toptxn, RBTXN_HAS_STREAMABLE_CHANGE, rbtxn_is_aborted, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferCheckMemoryLimit(), ReorderBufferFreeChange(), ReorderBufferProcessPartialChange(), ReorderBufferTXNByXid(), ReorderBufferChange::txn, ReorderBufferTXN::txn_flags, and XLogRecPtrIsValid.

Referenced by DecodeDelete(), DecodeInsert(), DecodeMultiInsert(), DecodeSpecConfirm(), DecodeTruncate(), DecodeUpdate(), ReorderBufferAddNewCommandId(), ReorderBufferAddSnapshot(), ReorderBufferQueueInvalidations(), and ReorderBufferQueueMessage().

◆ ReorderBufferQueueInvalidations()

◆ ReorderBufferQueueMessage()

void ReorderBufferQueueMessage ( ReorderBuffer rb,
TransactionId  xid,
Snapshot  snap,
XLogRecPtr  lsn,
bool  transactional,
const char prefix,
Size  message_size,
const char message 
)

Definition at line 873 of file reorderbuffer.c.

877{
878 if (transactional)
879 {
880 MemoryContext oldcontext;
881 ReorderBufferChange *change;
882
884
885 /*
886 * We don't expect snapshots for transactional changes - we'll use the
887 * snapshot derived later during apply (unless the change gets
888 * skipped).
889 */
890 Assert(!snap);
891
892 oldcontext = MemoryContextSwitchTo(rb->context);
893
896 change->data.msg.prefix = pstrdup(prefix);
897 change->data.msg.message_size = message_size;
898 change->data.msg.message = palloc(message_size);
899 memcpy(change->data.msg.message, message, message_size);
900
901 ReorderBufferQueueChange(rb, xid, lsn, change, false);
902
903 MemoryContextSwitchTo(oldcontext);
904 }
905 else
906 {
907 ReorderBufferTXN *txn = NULL;
908 volatile Snapshot snapshot_now = snap;
909
910 /* Non-transactional changes require a valid snapshot. */
911 Assert(snapshot_now);
912
913 if (xid != InvalidTransactionId)
914 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
915
916 /* setup snapshot to allow catalog access */
917 SetupHistoricSnapshot(snapshot_now, NULL);
918 PG_TRY();
919 {
920 rb->message(rb, txn, lsn, false, prefix, message_size, message);
921
923 }
924 PG_CATCH();
925 {
927 PG_RE_THROW();
928 }
929 PG_END_TRY();
930 }
931}

References ReorderBufferChange::action, Assert, ReorderBufferChange::data, fb(), InvalidTransactionId, MemoryContextSwitchTo(), ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, palloc(), PG_CATCH, PG_END_TRY, PG_RE_THROW, PG_TRY, ReorderBufferChange::prefix, pstrdup(), REORDER_BUFFER_CHANGE_MESSAGE, ReorderBufferAllocChange(), ReorderBufferQueueChange(), ReorderBufferTXNByXid(), SetupHistoricSnapshot(), and TeardownHistoricSnapshot().

Referenced by logicalmsg_decode().

◆ ReorderBufferRememberPrepareInfo()

bool ReorderBufferRememberPrepareInfo ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  prepare_lsn,
XLogRecPtr  end_lsn,
TimestampTz  prepare_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn 
)

Definition at line 2908 of file reorderbuffer.c.

2912{
2913 ReorderBufferTXN *txn;
2914
2915 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2916
2917 /* unknown transaction, nothing to do */
2918 if (txn == NULL)
2919 return false;
2920
2921 /*
2922 * Remember the prepare information to be later used by commit prepared in
2923 * case we skip doing prepare.
2924 */
2925 txn->final_lsn = prepare_lsn;
2926 txn->end_lsn = end_lsn;
2927 txn->prepare_time = prepare_time;
2928 txn->origin_id = origin_id;
2929 txn->origin_lsn = origin_lsn;
2930
2931 /* Mark this transaction as a prepared transaction */
2934
2935 return true;
2936}

References Assert, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, InvalidXLogRecPtr, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, ReorderBufferTXN::prepare_time, RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by DecodePrepare().

◆ ReorderBufferReplay()

static void ReorderBufferReplay ( ReorderBufferTXN txn,
ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  commit_lsn,
XLogRecPtr  end_lsn,
TimestampTz  commit_time,
ReplOriginId  origin_id,
XLogRecPtr  origin_lsn 
)
static

Definition at line 2824 of file reorderbuffer.c.

2829{
2830 Snapshot snapshot_now;
2831 CommandId command_id = FirstCommandId;
2832
2833 txn->final_lsn = commit_lsn;
2834 txn->end_lsn = end_lsn;
2835 txn->commit_time = commit_time;
2836 txn->origin_id = origin_id;
2837 txn->origin_lsn = origin_lsn;
2838
2839 /*
2840 * If the transaction was (partially) streamed, we need to commit it in a
2841 * 'streamed' way. That is, we first stream the remaining part of the
2842 * transaction, and then invoke stream_commit message.
2843 *
2844 * Called after everything (origin ID, LSN, ...) is stored in the
2845 * transaction to avoid passing that information directly.
2846 */
2847 if (rbtxn_is_streamed(txn))
2848 {
2850 return;
2851 }
2852
2853 /*
2854 * If this transaction has no snapshot, it didn't make any changes to the
2855 * database, so there's nothing to decode. Note that
2856 * ReorderBufferCommitChild will have transferred any snapshots from
2857 * subtransactions if there were any.
2858 */
2859 if (txn->base_snapshot == NULL)
2860 {
2861 Assert(txn->ninvalidations == 0);
2862
2863 /*
2864 * Removing this txn before a commit might result in the computation
2865 * of an incorrect restart_lsn. See SnapBuildProcessRunningXacts.
2866 */
2867 if (!rbtxn_is_prepared(txn))
2869 return;
2870 }
2871
2872 snapshot_now = txn->base_snapshot;
2873
2874 /* Process and send the changes to output plugin. */
2875 ReorderBufferProcessTXN(rb, txn, commit_lsn, snapshot_now,
2876 command_id, false);
2877}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::commit_time, ReorderBufferTXN::end_lsn, fb(), ReorderBufferTXN::final_lsn, FirstCommandId, ReorderBufferTXN::ninvalidations, ReorderBufferTXN::origin_id, ReorderBufferTXN::origin_lsn, rbtxn_is_prepared, rbtxn_is_streamed, ReorderBufferCleanupTXN(), ReorderBufferProcessTXN(), and ReorderBufferStreamCommit().

Referenced by ReorderBufferCommit(), ReorderBufferFinishPrepared(), and ReorderBufferPrepare().

◆ ReorderBufferResetTXN()

static void ReorderBufferResetTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
Snapshot  snapshot_now,
CommandId  command_id,
XLogRecPtr  last_lsn,
ReorderBufferChange specinsert 
)
static

Definition at line 2165 of file reorderbuffer.c.

2170{
2171 /* Discard the changes that we just streamed */
2173
2174 /* Free all resources allocated for toast reconstruction */
2176
2177 /* Return the spec insert change if it is not NULL */
2178 if (specinsert != NULL)
2179 {
2181 specinsert = NULL;
2182 }
2183
2184 /*
2185 * For the streaming case, stop the stream and remember the command ID and
2186 * snapshot for the streaming run.
2187 */
2188 if (rbtxn_is_streamed(txn))
2189 {
2190 rb->stream_stop(rb, txn, last_lsn);
2191 ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
2192 }
2193
2194 /* All changes must be deallocated */
2195 Assert(txn->size == 0);
2196}

References Assert, fb(), rbtxn_is_prepared, rbtxn_is_streamed, ReorderBufferFreeChange(), ReorderBufferSaveTXNSnapshot(), ReorderBufferToastReset(), ReorderBufferTruncateTXN(), and ReorderBufferTXN::size.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferRestoreChange()

static void ReorderBufferRestoreChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
char data 
)
static

Definition at line 4688 of file reorderbuffer.c.

4690{
4692 ReorderBufferChange *change;
4693
4694 ondisk = (ReorderBufferDiskChange *) data;
4695
4696 change = ReorderBufferAllocChange(rb);
4697
4698 /* copy static part */
4699 memcpy(change, &ondisk->change, sizeof(ReorderBufferChange));
4700
4701 data += sizeof(ReorderBufferDiskChange);
4702
4703 /* restore individual stuff */
4704 switch (change->action)
4705 {
4706 /* fall through these, they're all similar enough */
4711 if (change->data.tp.oldtuple)
4712 {
4713 uint32 tuplelen = ((HeapTuple) data)->t_len;
4714
4715 change->data.tp.oldtuple =
4717
4718 /* restore ->tuple */
4719 memcpy(change->data.tp.oldtuple, data,
4720 sizeof(HeapTupleData));
4721 data += sizeof(HeapTupleData);
4722
4723 /* reset t_data pointer into the new tuplebuf */
4724 change->data.tp.oldtuple->t_data =
4725 (HeapTupleHeader) ((char *) change->data.tp.oldtuple + HEAPTUPLESIZE);
4726
4727 /* restore tuple data itself */
4729 data += tuplelen;
4730 }
4731
4732 if (change->data.tp.newtuple)
4733 {
4734 /* here, data might not be suitably aligned! */
4736
4738 sizeof(uint32));
4739
4740 change->data.tp.newtuple =
4742
4743 /* restore ->tuple */
4744 memcpy(change->data.tp.newtuple, data,
4745 sizeof(HeapTupleData));
4746 data += sizeof(HeapTupleData);
4747
4748 /* reset t_data pointer into the new tuplebuf */
4749 change->data.tp.newtuple->t_data =
4750 (HeapTupleHeader) ((char *) change->data.tp.newtuple + HEAPTUPLESIZE);
4751
4752 /* restore tuple data itself */
4754 data += tuplelen;
4755 }
4756
4757 break;
4759 {
4760 Size prefix_size;
4761
4762 /* read prefix */
4763 memcpy(&prefix_size, data, sizeof(Size));
4764 data += sizeof(Size);
4765 change->data.msg.prefix = MemoryContextAlloc(rb->context,
4766 prefix_size);
4767 memcpy(change->data.msg.prefix, data, prefix_size);
4768 Assert(change->data.msg.prefix[prefix_size - 1] == '\0');
4769 data += prefix_size;
4770
4771 /* read the message */
4772 memcpy(&change->data.msg.message_size, data, sizeof(Size));
4773 data += sizeof(Size);
4774 change->data.msg.message = MemoryContextAlloc(rb->context,
4775 change->data.msg.message_size);
4776 memcpy(change->data.msg.message, data,
4777 change->data.msg.message_size);
4778 data += change->data.msg.message_size;
4779
4780 break;
4781 }
4783 {
4785 change->data.inval.ninvalidations;
4786
4787 change->data.inval.invalidations =
4788 MemoryContextAlloc(rb->context, inval_size);
4789
4790 /* read the message */
4792
4793 break;
4794 }
4796 {
4799 Size size;
4800
4801 oldsnap = (Snapshot) data;
4802
4803 size = sizeof(SnapshotData) +
4804 sizeof(TransactionId) * oldsnap->xcnt +
4805 sizeof(TransactionId) * (oldsnap->subxcnt + 0);
4806
4807 change->data.snapshot = MemoryContextAllocZero(rb->context, size);
4808
4809 newsnap = change->data.snapshot;
4810
4811 memcpy(newsnap, data, size);
4812 newsnap->xip = (TransactionId *)
4813 (((char *) newsnap) + sizeof(SnapshotData));
4814 newsnap->subxip = newsnap->xip + newsnap->xcnt;
4815 newsnap->copied = true;
4816 break;
4817 }
4818 /* the base struct contains all the data, easy peasy */
4820 {
4821 Oid *relids;
4822
4823 relids = ReorderBufferAllocRelids(rb, change->data.truncate.nrelids);
4824 memcpy(relids, data, change->data.truncate.nrelids * sizeof(Oid));
4825 change->data.truncate.relids = relids;
4826
4827 break;
4828 }
4833 break;
4834 }
4835
4836 dlist_push_tail(&txn->changes, &change->node);
4837 txn->nentries_mem++;
4838
4839 /*
4840 * Update memory accounting for the restored change. We need to do this
4841 * although we don't check the memory limit when restoring the changes in
4842 * this branch (we only do that when initially queueing the changes after
4843 * decoding), because we will release the changes later, and that will
4844 * update the accounting too (subtracting the size from the counters). And
4845 * we don't want to underflow there.
4846 */
4848 ReorderBufferChangeSize(change));
4849}

References ReorderBufferChange::action, Assert, ReorderBufferDiskChange::change, ReorderBufferTXN::changes, ReorderBufferChange::data, data, dlist_push_tail(), fb(), HEAPTUPLESIZE, ReorderBufferChange::inval, ReorderBufferChange::invalidations, MemoryContextAlloc(), MemoryContextAllocZero(), ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferTXN::nentries_mem, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::node, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferAllocChange(), ReorderBufferAllocRelids(), ReorderBufferAllocTupleBuf(), ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), SizeofHeapTupleHeader, ReorderBufferChange::snapshot, HeapTupleData::t_data, ReorderBufferChange::tp, ReorderBufferChange::truncate, and SnapshotData::xcnt.

Referenced by ReorderBufferRestoreChanges().

◆ ReorderBufferRestoreChanges()

static Size ReorderBufferRestoreChanges ( ReorderBuffer rb,
ReorderBufferTXN txn,
TXNEntryFile file,
XLogSegNo segno 
)
static

Definition at line 4545 of file reorderbuffer.c.

4547{
4548 Size restored = 0;
4551 File *fd = &file->vfd;
4552
4555
4556 /* free current entries, so we have memory for more */
4558 {
4561
4562 dlist_delete(&cleanup->node);
4564 }
4565 txn->nentries_mem = 0;
4567
4569
4570 while (restored < max_changes_in_memory && *segno <= last_segno)
4571 {
4572 int readBytes;
4574
4576
4577 if (*fd == -1)
4578 {
4579 char path[MAXPGPATH];
4580
4581 /* first time in */
4582 if (*segno == 0)
4583 XLByteToSeg(txn->first_lsn, *segno, wal_segment_size);
4584
4585 Assert(*segno != 0 || dlist_is_empty(&txn->changes));
4586
4587 /*
4588 * No need to care about TLIs here, only used during a single run,
4589 * so each LSN only maps to a specific WAL record.
4590 */
4592 *segno);
4593
4595
4596 /* No harm in resetting the offset even in case of failure */
4597 file->curOffset = 0;
4598
4599 if (*fd < 0 && errno == ENOENT)
4600 {
4601 *fd = -1;
4602 (*segno)++;
4603 continue;
4604 }
4605 else if (*fd < 0)
4606 ereport(ERROR,
4608 errmsg("could not open file \"%s\": %m",
4609 path)));
4610 }
4611
4612 /*
4613 * Read the statically sized part of a change which has information
4614 * about the total size. If we couldn't read a record, we're at the
4615 * end of this file.
4616 */
4618 readBytes = FileRead(file->vfd, rb->outbuf,
4621
4622 /* eof */
4623 if (readBytes == 0)
4624 {
4625 FileClose(*fd);
4626 *fd = -1;
4627 (*segno)++;
4628 continue;
4629 }
4630 else if (readBytes < 0)
4631 ereport(ERROR,
4633 errmsg("could not read from reorderbuffer spill file: %m")));
4634 else if (readBytes != sizeof(ReorderBufferDiskChange))
4635 ereport(ERROR,
4637 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4638 readBytes,
4639 (uint32) sizeof(ReorderBufferDiskChange))));
4640
4641 file->curOffset += readBytes;
4642
4643 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4644
4646 sizeof(ReorderBufferDiskChange) + ondisk->size);
4647 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4648
4649 readBytes = FileRead(file->vfd,
4650 rb->outbuf + sizeof(ReorderBufferDiskChange),
4651 ondisk->size - sizeof(ReorderBufferDiskChange),
4652 file->curOffset,
4654
4655 if (readBytes < 0)
4656 ereport(ERROR,
4658 errmsg("could not read from reorderbuffer spill file: %m")));
4659 else if (readBytes != ondisk->size - sizeof(ReorderBufferDiskChange))
4660 ereport(ERROR,
4662 errmsg("could not read from reorderbuffer spill file: read %d instead of %u bytes",
4663 readBytes,
4664 (uint32) (ondisk->size - sizeof(ReorderBufferDiskChange)))));
4665
4666 file->curOffset += readBytes;
4667
4668 /*
4669 * ok, read a full change from disk, now restore it into proper
4670 * in-memory format
4671 */
4672 ReorderBufferRestoreChange(rb, txn, rb->outbuf);
4673 restored++;
4674 }
4675
4676 return restored;
4677}

References Assert, ReorderBufferTXN::changes, CHECK_FOR_INTERRUPTS, cleanup(), TXNEntryFile::curOffset, dlist_container, dlist_delete(), dlist_foreach_modify, dlist_is_empty(), ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), FileClose(), FileRead(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::first_lsn, max_changes_in_memory, MAXPGPATH, MyReplicationSlot, ReorderBufferTXN::nentries_mem, PathNameOpenFile(), PG_BINARY, ReorderBufferFreeChange(), ReorderBufferRestoreChange(), ReorderBufferSerializedPath(), ReorderBufferSerializeReserve(), ReorderBufferDiskChange::size, TXNEntryFile::vfd, wal_segment_size, ReorderBufferTXN::xid, XLByteToSeg, and XLogRecPtrIsValid.

Referenced by ReorderBufferIterTXNInit(), and ReorderBufferIterTXNNext().

◆ ReorderBufferRestoreCleanup()

static void ReorderBufferRestoreCleanup ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4855 of file reorderbuffer.c.

4856{
4857 XLogSegNo first;
4858 XLogSegNo cur;
4859 XLogSegNo last;
4860
4863
4866
4867 /* iterate over all possible filenames, and delete them */
4868 for (cur = first; cur <= last; cur++)
4869 {
4870 char path[MAXPGPATH];
4871
4873 if (unlink(path) != 0 && errno != ENOENT)
4874 ereport(ERROR,
4876 errmsg("could not remove file \"%s\": %m", path)));
4877 }
4878}

References Assert, cur, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), ReorderBufferTXN::final_lsn, ReorderBufferTXN::first_lsn, MAXPGPATH, MyReplicationSlot, ReorderBufferSerializedPath(), wal_segment_size, ReorderBufferTXN::xid, XLByteToSeg, and XLogRecPtrIsValid.

Referenced by ReorderBufferCleanupTXN(), and ReorderBufferTruncateTXN().

◆ ReorderBufferSaveTXNSnapshot()

static void ReorderBufferSaveTXNSnapshot ( ReorderBuffer rb,
ReorderBufferTXN txn,
Snapshot  snapshot_now,
CommandId  command_id 
)
inlinestatic

Definition at line 2120 of file reorderbuffer.c.

2122{
2123 txn->command_id = command_id;
2124
2125 /* Avoid copying if it's already copied. */
2126 if (snapshot_now->copied)
2127 txn->snapshot_now = snapshot_now;
2128 else
2129 txn->snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
2130 txn, command_id);
2131}

References ReorderBufferTXN::command_id, SnapshotData::copied, fb(), ReorderBufferCopySnap(), and ReorderBufferTXN::snapshot_now.

Referenced by ReorderBufferProcessTXN(), and ReorderBufferResetTXN().

◆ ReorderBufferSerializeChange()

static void ReorderBufferSerializeChange ( ReorderBuffer rb,
ReorderBufferTXN txn,
int  fd,
ReorderBufferChange change 
)
static

Definition at line 4093 of file reorderbuffer.c.

4095{
4098
4100
4101 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4102 memcpy(&ondisk->change, change, sizeof(ReorderBufferChange));
4103
4104 switch (change->action)
4105 {
4106 /* fall through these, they're all similar enough */
4111 {
4112 char *data;
4114 newtup;
4115 Size oldlen = 0;
4116 Size newlen = 0;
4117
4118 oldtup = change->data.tp.oldtuple;
4119 newtup = change->data.tp.newtuple;
4120
4121 if (oldtup)
4122 {
4123 sz += sizeof(HeapTupleData);
4124 oldlen = oldtup->t_len;
4125 sz += oldlen;
4126 }
4127
4128 if (newtup)
4129 {
4130 sz += sizeof(HeapTupleData);
4131 newlen = newtup->t_len;
4132 sz += newlen;
4133 }
4134
4135 /* make sure we have enough space */
4137
4138 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4139 /* might have been reallocated above */
4140 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4141
4142 if (oldlen)
4143 {
4144 memcpy(data, oldtup, sizeof(HeapTupleData));
4145 data += sizeof(HeapTupleData);
4146
4147 memcpy(data, oldtup->t_data, oldlen);
4148 data += oldlen;
4149 }
4150
4151 if (newlen)
4152 {
4153 memcpy(data, newtup, sizeof(HeapTupleData));
4154 data += sizeof(HeapTupleData);
4155
4156 memcpy(data, newtup->t_data, newlen);
4157 data += newlen;
4158 }
4159 break;
4160 }
4162 {
4163 char *data;
4164 Size prefix_size = strlen(change->data.msg.prefix) + 1;
4165
4166 sz += prefix_size + change->data.msg.message_size +
4167 sizeof(Size) + sizeof(Size);
4169
4170 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4171
4172 /* might have been reallocated above */
4173 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4174
4175 /* write the prefix including the size */
4176 memcpy(data, &prefix_size, sizeof(Size));
4177 data += sizeof(Size);
4178 memcpy(data, change->data.msg.prefix,
4179 prefix_size);
4180 data += prefix_size;
4181
4182 /* write the message including the size */
4183 memcpy(data, &change->data.msg.message_size, sizeof(Size));
4184 data += sizeof(Size);
4185 memcpy(data, change->data.msg.message,
4186 change->data.msg.message_size);
4187 data += change->data.msg.message_size;
4188
4189 break;
4190 }
4192 {
4193 char *data;
4195 change->data.inval.ninvalidations;
4196
4197 sz += inval_size;
4198
4200 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4201
4202 /* might have been reallocated above */
4203 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4205 data += inval_size;
4206
4207 break;
4208 }
4210 {
4211 Snapshot snap;
4212 char *data;
4213
4214 snap = change->data.snapshot;
4215
4216 sz += sizeof(SnapshotData) +
4217 sizeof(TransactionId) * snap->xcnt +
4218 sizeof(TransactionId) * snap->subxcnt;
4219
4220 /* make sure we have enough space */
4222 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4223 /* might have been reallocated above */
4224 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4225
4226 memcpy(data, snap, sizeof(SnapshotData));
4227 data += sizeof(SnapshotData);
4228
4229 if (snap->xcnt)
4230 {
4231 memcpy(data, snap->xip,
4232 sizeof(TransactionId) * snap->xcnt);
4233 data += sizeof(TransactionId) * snap->xcnt;
4234 }
4235
4236 if (snap->subxcnt)
4237 {
4238 memcpy(data, snap->subxip,
4239 sizeof(TransactionId) * snap->subxcnt);
4240 data += sizeof(TransactionId) * snap->subxcnt;
4241 }
4242 break;
4243 }
4245 {
4246 Size size;
4247 char *data;
4248
4249 /* account for the OIDs of truncated relations */
4250 size = sizeof(Oid) * change->data.truncate.nrelids;
4251 sz += size;
4252
4253 /* make sure we have enough space */
4255
4256 data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
4257 /* might have been reallocated above */
4258 ondisk = (ReorderBufferDiskChange *) rb->outbuf;
4259
4260 memcpy(data, change->data.truncate.relids, size);
4261 data += size;
4262
4263 break;
4264 }
4269 /* ReorderBufferChange contains everything important */
4270 break;
4271 }
4272
4273 ondisk->size = sz;
4274
4275 errno = 0;
4277 if (write(fd, rb->outbuf, ondisk->size) != ondisk->size)
4278 {
4279 int save_errno = errno;
4280
4282
4283 /* if write didn't set errno, assume problem is no disk space */
4285 ereport(ERROR,
4287 errmsg("could not write to data file for XID %u: %m",
4288 txn->xid)));
4289 }
4291
4292 /*
4293 * Keep the transaction's final_lsn up to date with each change we send to
4294 * disk, so that ReorderBufferRestoreCleanup works correctly. (We used to
4295 * only do this on commit and abort records, but that doesn't work if a
4296 * system crash leaves a transaction without its abort record).
4297 *
4298 * Make sure not to move it backwards.
4299 */
4300 if (txn->final_lsn < change->lsn)
4301 txn->final_lsn = change->lsn;
4302
4303 Assert(ondisk->change.action == change->action);
4304}

References ReorderBufferChange::action, Assert, ReorderBufferDiskChange::change, CloseTransientFile(), ReorderBufferChange::data, data, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), ReorderBufferTXN::final_lsn, ReorderBufferChange::inval, ReorderBufferChange::invalidations, ReorderBufferChange::lsn, ReorderBufferChange::message, ReorderBufferChange::message_size, ReorderBufferChange::msg, ReorderBufferChange::newtuple, ReorderBufferChange::ninvalidations, ReorderBufferChange::nrelids, ReorderBufferChange::oldtuple, pgstat_report_wait_end(), pgstat_report_wait_start(), ReorderBufferChange::prefix, ReorderBufferChange::relids, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_TRUNCATE, REORDER_BUFFER_CHANGE_UPDATE, ReorderBufferSerializeReserve(), ReorderBufferDiskChange::size, ReorderBufferChange::snapshot, HeapTupleData::t_len, ReorderBufferChange::tp, ReorderBufferChange::truncate, write, SnapshotData::xcnt, and ReorderBufferTXN::xid.

Referenced by ReorderBufferSerializeTXN().

◆ ReorderBufferSerializedPath()

◆ ReorderBufferSerializeReserve()

static void ReorderBufferSerializeReserve ( ReorderBuffer rb,
Size  sz 
)
static

Definition at line 3773 of file reorderbuffer.c.

3774{
3775 if (!rb->outbufsize)
3776 {
3777 rb->outbuf = MemoryContextAlloc(rb->context, sz);
3778 rb->outbufsize = sz;
3779 }
3780 else if (rb->outbufsize < sz)
3781 {
3782 rb->outbuf = repalloc(rb->outbuf, sz);
3783 rb->outbufsize = sz;
3784 }
3785}

References fb(), MemoryContextAlloc(), and repalloc().

Referenced by ReorderBufferRestoreChanges(), and ReorderBufferSerializeChange().

◆ ReorderBufferSerializeTXN()

static void ReorderBufferSerializeTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 3998 of file reorderbuffer.c.

3999{
4002 int fd = -1;
4004 Size spilled = 0;
4005 Size size = txn->size;
4006
4007 elog(DEBUG2, "spill %u changes in XID %u to disk",
4008 (uint32) txn->nentries_mem, txn->xid);
4009
4010 /* do the same to all child TXs */
4012 {
4014
4017 }
4018
4019 /* serialize changestream */
4021 {
4022 ReorderBufferChange *change;
4023
4024 change = dlist_container(ReorderBufferChange, node, change_i.cur);
4025
4026 /*
4027 * store in segment in which it belongs by start lsn, don't split over
4028 * multiple segments tho
4029 */
4030 if (fd == -1 ||
4032 {
4033 char path[MAXPGPATH];
4034
4035 if (fd != -1)
4037
4039
4040 /*
4041 * No need to care about TLIs here, only used during a single run,
4042 * so each LSN only maps to a specific WAL record.
4043 */
4045 curOpenSegNo);
4046
4047 /* open segment, create it if necessary */
4048 fd = OpenTransientFile(path,
4050
4051 if (fd < 0)
4052 ereport(ERROR,
4054 errmsg("could not open file \"%s\": %m", path)));
4055 }
4056
4057 ReorderBufferSerializeChange(rb, txn, fd, change);
4058 dlist_delete(&change->node);
4059 ReorderBufferFreeChange(rb, change, false);
4060
4061 spilled++;
4062 }
4063
4064 /* Update the memory counter */
4065 ReorderBufferChangeMemoryUpdate(rb, NULL, txn, false, size);
4066
4067 /* update the statistics iff we have spilled anything */
4068 if (spilled)
4069 {
4070 rb->spillCount += 1;
4071 rb->spillBytes += size;
4072
4073 /* don't consider already serialized transactions */
4074 rb->spillTxns += (rbtxn_is_serialized(txn) || rbtxn_is_serialized_clear(txn)) ? 0 : 1;
4075
4076 /* update the decoding stats */
4078 }
4079
4080 Assert(spilled == txn->nentries_mem);
4082 txn->nentries_mem = 0;
4084
4085 if (fd != -1)
4087}

References Assert, ReorderBufferTXN::changes, CloseTransientFile(), DEBUG2, dlist_container, dlist_delete(), dlist_foreach, dlist_foreach_modify, dlist_is_empty(), elog, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), ReorderBufferChange::lsn, MAXPGPATH, MyReplicationSlot, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, OpenTransientFile(), PG_BINARY, RBTXN_IS_SERIALIZED, rbtxn_is_serialized, rbtxn_is_serialized_clear, ReorderBufferChangeMemoryUpdate(), ReorderBufferFreeChange(), ReorderBufferSerializeChange(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReorderBufferTXN::size, ReorderBufferTXN::subtxns, ReorderBufferTXN::txn_flags, UpdateDecodingStats(), wal_segment_size, ReorderBufferTXN::xid, XLByteInSeg, and XLByteToSeg.

Referenced by ReorderBufferCheckMemoryLimit(), ReorderBufferIterTXNInit(), and ReorderBufferSerializeTXN().

◆ ReorderBufferSetBaseSnapshot()

void ReorderBufferSetBaseSnapshot ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn,
Snapshot  snap 
)

Definition at line 3327 of file reorderbuffer.c.

3329{
3330 ReorderBufferTXN *txn;
3331 bool is_new;
3332
3333 Assert(snap != NULL);
3334
3335 /*
3336 * Fetch the transaction to operate on. If we know it's a subtransaction,
3337 * operate on its top-level transaction instead.
3338 */
3339 txn = ReorderBufferTXNByXid(rb, xid, true, &is_new, lsn, true);
3340 if (rbtxn_is_known_subxact(txn))
3341 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3342 NULL, InvalidXLogRecPtr, false);
3343 Assert(txn->base_snapshot == NULL);
3344
3345 txn->base_snapshot = snap;
3346 txn->base_snapshot_lsn = lsn;
3347 dlist_push_tail(&rb->txns_by_base_snapshot_lsn, &txn->base_snapshot_node);
3348
3350}

References Assert, AssertTXNLsnOrder(), ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_lsn, ReorderBufferTXN::base_snapshot_node, dlist_push_tail(), fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, ReorderBufferTXNByXid(), and ReorderBufferTXN::toplevel_xid.

Referenced by SnapBuildCommitTxn(), and SnapBuildProcessChange().

◆ ReorderBufferSetRestartPoint()

void ReorderBufferSetRestartPoint ( ReorderBuffer rb,
XLogRecPtr  ptr 
)

Definition at line 1087 of file reorderbuffer.c.

1088{
1089 rb->current_restart_decoding_lsn = ptr;
1090}

References fb().

Referenced by SnapBuildRestore(), and SnapBuildSerialize().

◆ ReorderBufferSkipPrepare()

void ReorderBufferSkipPrepare ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 2940 of file reorderbuffer.c.

2941{
2942 ReorderBufferTXN *txn;
2943
2944 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false);
2945
2946 /* unknown transaction, nothing to do */
2947 if (txn == NULL)
2948 return;
2949
2950 /* txn must have been marked as a prepared transaction */
2953}

References Assert, fb(), InvalidXLogRecPtr, RBTXN_IS_PREPARED, RBTXN_PREPARE_STATUS_MASK, RBTXN_SKIPPED_PREPARE, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by DecodePrepare().

◆ ReorderBufferStreamCommit()

static void ReorderBufferStreamCommit ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 1983 of file reorderbuffer.c.

1984{
1985 /* we should only call this for previously streamed transactions */
1987
1989
1990 if (rbtxn_is_prepared(txn))
1991 {
1992 /*
1993 * Note, we send stream prepare even if a concurrent abort is
1994 * detected. See DecodePrepare for more information.
1995 */
1997 rb->stream_prepare(rb, txn, txn->final_lsn);
1999
2000 /*
2001 * This is a PREPARED transaction, part of a two-phase commit. The
2002 * full cleanup will happen as part of the COMMIT PREPAREDs, so now
2003 * just truncate txn by removing changes and tuplecids.
2004 */
2005 ReorderBufferTruncateTXN(rb, txn, true);
2006 /* Reset the CheckXidAlive */
2008 }
2009 else
2010 {
2011 rb->stream_commit(rb, txn, txn->final_lsn);
2013 }
2014}

References Assert, CheckXidAlive, fb(), ReorderBufferTXN::final_lsn, InvalidTransactionId, rbtxn_is_prepared, rbtxn_is_streamed, RBTXN_SENT_PREPARE, rbtxn_sent_prepare, ReorderBufferCleanupTXN(), ReorderBufferStreamTXN(), ReorderBufferTruncateTXN(), and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferReplay().

◆ ReorderBufferStreamTXN()

static void ReorderBufferStreamTXN ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4343 of file reorderbuffer.c.

4344{
4345 Snapshot snapshot_now;
4346 CommandId command_id;
4347 Size stream_bytes;
4348 bool txn_is_streamed;
4349
4350 /* We can never reach here for a subtransaction. */
4351 Assert(rbtxn_is_toptxn(txn));
4352
4353 /*
4354 * We can't make any assumptions about base snapshot here, similar to what
4355 * ReorderBufferCommit() does. That relies on base_snapshot getting
4356 * transferred from subxact in ReorderBufferCommitChild(), but that was
4357 * not yet called as the transaction is in-progress.
4358 *
4359 * So just walk the subxacts and use the same logic here. But we only need
4360 * to do that once, when the transaction is streamed for the first time.
4361 * After that we need to reuse the snapshot from the previous run.
4362 *
4363 * Unlike DecodeCommit which adds xids of all the subtransactions in
4364 * snapshot's xip array via SnapBuildCommitTxn, we can't do that here but
4365 * we do add them to subxip array instead via ReorderBufferCopySnap. This
4366 * allows the catalog changes made in subtransactions decoded till now to
4367 * be visible.
4368 */
4369 if (txn->snapshot_now == NULL)
4370 {
4372
4373 /* make sure this transaction is streamed for the first time */
4375
4376 /* at the beginning we should have invalid command ID */
4378
4380 {
4382
4385 }
4386
4387 /*
4388 * If this transaction has no snapshot, it didn't make any changes to
4389 * the database till now, so there's nothing to decode.
4390 */
4391 if (txn->base_snapshot == NULL)
4392 {
4393 Assert(txn->ninvalidations == 0);
4394 return;
4395 }
4396
4397 command_id = FirstCommandId;
4398 snapshot_now = ReorderBufferCopySnap(rb, txn->base_snapshot,
4399 txn, command_id);
4400 }
4401 else
4402 {
4403 /* the transaction must have been already streamed */
4405
4406 /*
4407 * Nah, we already have snapshot from the previous streaming run. We
4408 * assume new subxacts can't move the LSN backwards, and so can't beat
4409 * the LSN condition in the previous branch (so no need to walk
4410 * through subxacts again). In fact, we must not do that as we may be
4411 * using snapshot half-way through the subxact.
4412 */
4413 command_id = txn->command_id;
4414
4415 /*
4416 * We can't use txn->snapshot_now directly because after the last
4417 * streaming run, we might have got some new sub-transactions. So we
4418 * need to add them to the snapshot.
4419 */
4420 snapshot_now = ReorderBufferCopySnap(rb, txn->snapshot_now,
4421 txn, command_id);
4422
4423 /* Free the previously copied snapshot. */
4424 Assert(txn->snapshot_now->copied);
4426 txn->snapshot_now = NULL;
4427 }
4428
4429 /*
4430 * Remember this information to be used later to update stats. We can't
4431 * update the stats here as an error while processing the changes would
4432 * lead to the accumulation of stats even though we haven't streamed all
4433 * the changes.
4434 */
4436 stream_bytes = txn->total_size;
4437
4438 /* Process and send the changes to output plugin. */
4439 ReorderBufferProcessTXN(rb, txn, InvalidXLogRecPtr, snapshot_now,
4440 command_id, true);
4441
4442 rb->streamCount += 1;
4443 rb->streamBytes += stream_bytes;
4444
4445 /* Don't consider already streamed transaction. */
4446 rb->streamTxns += (txn_is_streamed) ? 0 : 1;
4447
4448 /* update the decoding stats */
4450
4452 Assert(txn->nentries == 0);
4453 Assert(txn->nentries_mem == 0);
4454}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::changes, ReorderBufferTXN::command_id, SnapshotData::copied, dlist_container, dlist_foreach, dlist_is_empty(), fb(), FirstCommandId, InvalidCommandId, InvalidXLogRecPtr, ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferTXN::ninvalidations, rbtxn_is_streamed, rbtxn_is_toptxn, ReorderBufferCopySnap(), ReorderBufferFreeSnap(), ReorderBufferProcessTXN(), ReorderBufferTransferSnapToParent(), ReorderBufferTXN::snapshot_now, ReorderBufferTXN::subtxns, ReorderBufferTXN::total_size, and UpdateDecodingStats().

Referenced by ReorderBufferCheckMemoryLimit(), ReorderBufferProcessPartialChange(), and ReorderBufferStreamCommit().

◆ ReorderBufferToastAppendChunk()

static void ReorderBufferToastAppendChunk ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change 
)
static

Definition at line 4996 of file reorderbuffer.c.

4998{
5001 bool found;
5003 bool isnull;
5004 Pointer chunk;
5005 TupleDesc desc = RelationGetDescr(relation);
5006 Oid chunk_id;
5008
5009 if (txn->toast_hash == NULL)
5011
5012 Assert(IsToastRelation(relation));
5013
5014 newtup = change->data.tp.newtuple;
5015 chunk_id = DatumGetObjectId(fastgetattr(newtup, 1, desc, &isnull));
5016 Assert(!isnull);
5017 chunk_seq = DatumGetInt32(fastgetattr(newtup, 2, desc, &isnull));
5018 Assert(!isnull);
5019
5021 hash_search(txn->toast_hash, &chunk_id, HASH_ENTER, &found);
5022
5023 if (!found)
5024 {
5025 Assert(ent->chunk_id == chunk_id);
5026 ent->num_chunks = 0;
5027 ent->last_chunk_seq = 0;
5028 ent->size = 0;
5029 ent->reconstructed = NULL;
5030 dlist_init(&ent->chunks);
5031
5032 if (chunk_seq != 0)
5033 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq 0",
5034 chunk_seq, chunk_id);
5035 }
5036 else if (found && chunk_seq != ent->last_chunk_seq + 1)
5037 elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq %d",
5038 chunk_seq, chunk_id, ent->last_chunk_seq + 1);
5039
5040 chunk = DatumGetPointer(fastgetattr(newtup, 3, desc, &isnull));
5041 Assert(!isnull);
5042
5043 /* calculate size so we can allocate the right size at once later */
5046 else if (VARATT_IS_SHORT(chunk))
5047 /* could happen due to heap_form_tuple doing its thing */
5049 else
5050 elog(ERROR, "unexpected type of toast chunk");
5051
5052 ent->size += chunksize;
5053 ent->last_chunk_seq = chunk_seq;
5054 ent->num_chunks++;
5055 dlist_push_tail(&ent->chunks, &change->node);
5056}

References Assert, ReorderBufferChange::data, DatumGetInt32(), DatumGetObjectId(), DatumGetPointer(), dlist_init(), dlist_push_tail(), elog, ERROR, fastgetattr(), fb(), HASH_ENTER, hash_search(), IsToastRelation(), ReorderBufferChange::newtuple, ReorderBufferChange::node, RelationGetDescr, ReorderBufferToastInitHash(), ReorderBufferTXN::toast_hash, ReorderBufferChange::tp, VARATT_IS_EXTENDED(), VARATT_IS_SHORT(), VARHDRSZ, VARHDRSZ_SHORT, VARSIZE(), and VARSIZE_SHORT().

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferToastInitHash()

static void ReorderBufferToastInitHash ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 4976 of file reorderbuffer.c.

4977{
4979
4980 Assert(txn->toast_hash == NULL);
4981
4982 hash_ctl.keysize = sizeof(Oid);
4983 hash_ctl.entrysize = sizeof(ReorderBufferToastEnt);
4984 hash_ctl.hcxt = rb->context;
4985 txn->toast_hash = hash_create("ReorderBufferToastHash", 5, &hash_ctl,
4987}

References Assert, fb(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, and ReorderBufferTXN::toast_hash.

Referenced by ReorderBufferToastAppendChunk().

◆ ReorderBufferToastReplace()

static void ReorderBufferToastReplace ( ReorderBuffer rb,
ReorderBufferTXN txn,
Relation  relation,
ReorderBufferChange change 
)
static

Definition at line 5079 of file reorderbuffer.c.

5081{
5082 TupleDesc desc;
5083 int natt;
5084 Datum *attrs;
5085 bool *isnull;
5086 bool *free;
5088 Relation toast_rel;
5090 MemoryContext oldcontext;
5092 Size old_size;
5093
5094 /* no toast tuples changed */
5095 if (txn->toast_hash == NULL)
5096 return;
5097
5098 /*
5099 * We're going to modify the size of the change. So, to make sure the
5100 * accounting is correct we record the current change size and then after
5101 * re-computing the change we'll subtract the recorded size and then
5102 * re-add the new change size at the end. We don't immediately subtract
5103 * the old size because if there is any error before we add the new size,
5104 * we will release the changes and that will update the accounting info
5105 * (subtracting the size from the counters). And we don't want to
5106 * underflow there.
5107 */
5109
5110 oldcontext = MemoryContextSwitchTo(rb->context);
5111
5112 /* we should only have toast tuples in an INSERT or UPDATE */
5113 Assert(change->data.tp.newtuple);
5114
5115 desc = RelationGetDescr(relation);
5116
5117 toast_rel = RelationIdGetRelation(relation->rd_rel->reltoastrelid);
5118 if (!RelationIsValid(toast_rel))
5119 elog(ERROR, "could not open toast relation with OID %u (base relation \"%s\")",
5120 relation->rd_rel->reltoastrelid, RelationGetRelationName(relation));
5121
5122 toast_desc = RelationGetDescr(toast_rel);
5123
5124 /* should we allocate from stack instead? */
5125 attrs = palloc0_array(Datum, desc->natts);
5126 isnull = palloc0_array(bool, desc->natts);
5127 free = palloc0_array(bool, desc->natts);
5128
5129 newtup = change->data.tp.newtuple;
5130
5131 heap_deform_tuple(newtup, desc, attrs, isnull);
5132
5133 for (natt = 0; natt < desc->natts; natt++)
5134 {
5138
5139 /* va_rawsize is the size of the original datum -- including header */
5140 varatt_external toast_pointer;
5143 varlena *reconstructed;
5144 dlist_iter it;
5145 Size data_done = 0;
5146
5147 if (attr->attisdropped)
5148 continue;
5149
5150 /* not a varlena datatype */
5151 if (attr->attlen != -1)
5152 continue;
5153
5154 /* no data */
5155 if (isnull[natt])
5156 continue;
5157
5158 /* ok, we know we have a toast datum */
5160
5161 /* no need to do anything if the tuple isn't external */
5163 continue;
5164
5166
5167 /*
5168 * Check whether the toast tuple changed, replace if so.
5169 */
5172 &toast_pointer.va_valueid,
5173 HASH_FIND,
5174 NULL);
5175 if (ent == NULL)
5176 continue;
5177
5178 new_datum =
5180
5181 free[natt] = true;
5182
5183 reconstructed = palloc0(toast_pointer.va_rawsize);
5184
5185 ent->reconstructed = reconstructed;
5186
5187 /* stitch toast tuple back together from its parts */
5188 dlist_foreach(it, &ent->chunks)
5189 {
5190 bool cisnull;
5193 Pointer chunk;
5194
5196 ctup = cchange->data.tp.newtuple;
5198
5199 Assert(!cisnull);
5202
5203 memcpy(VARDATA(reconstructed) + data_done,
5204 VARDATA(chunk),
5207 }
5208 Assert(data_done == VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer));
5209
5210 /* make sure its marked as compressed or not */
5211 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
5212 SET_VARSIZE_COMPRESSED(reconstructed, data_done + VARHDRSZ);
5213 else
5214 SET_VARSIZE(reconstructed, data_done + VARHDRSZ);
5215
5217 redirect_pointer.pointer = reconstructed;
5218
5221 sizeof(redirect_pointer));
5222
5224 }
5225
5226 /*
5227 * Build tuple in separate memory & copy tuple back into the tuplebuf
5228 * passed to the output plugin. We can't directly heap_fill_tuple() into
5229 * the tuplebuf because attrs[] will point back into the current content.
5230 */
5231 tmphtup = heap_form_tuple(desc, attrs, isnull);
5232 Assert(newtup->t_len <= MaxHeapTupleSize);
5233 Assert(newtup->t_data == (HeapTupleHeader) ((char *) newtup + HEAPTUPLESIZE));
5234
5235 memcpy(newtup->t_data, tmphtup->t_data, tmphtup->t_len);
5236 newtup->t_len = tmphtup->t_len;
5237
5238 /*
5239 * free resources we won't further need, more persistent stuff will be
5240 * free'd in ReorderBufferToastReset().
5241 */
5242 RelationClose(toast_rel);
5243 pfree(tmphtup);
5244 for (natt = 0; natt < desc->natts; natt++)
5245 {
5246 if (free[natt])
5248 }
5249 pfree(attrs);
5250 pfree(free);
5251 pfree(isnull);
5252
5253 MemoryContextSwitchTo(oldcontext);
5254
5255 /* subtract the old change size */
5257 /* now add the change back, with the correct size */
5259 ReorderBufferChangeSize(change));
5260}

References Assert, CompactAttribute::attisdropped, CompactAttribute::attlen, ReorderBufferChange::data, DatumGetPointer(), dlist_container, dlist_foreach, elog, ERROR, fastgetattr(), fb(), free, HASH_FIND, hash_search(), heap_deform_tuple(), heap_form_tuple(), HEAPTUPLESIZE, INDIRECT_POINTER_SIZE, MaxHeapTupleSize, MemoryContextSwitchTo(), TupleDescData::natts, ReorderBufferChange::newtuple, palloc0(), palloc0_array, pfree(), PointerGetDatum(), RelationData::rd_rel, RelationClose(), RelationGetDescr, RelationGetRelationName, RelationIdGetRelation(), RelationIsValid, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), SET_VARSIZE(), SET_VARSIZE_COMPRESSED(), SET_VARTAG_EXTERNAL(), ReorderBufferTXN::toast_hash, ReorderBufferChange::tp, TupleDescCompactAttr(), varatt_external::va_rawsize, varatt_external::va_valueid, VARATT_EXTERNAL_GET_EXTSIZE(), VARATT_EXTERNAL_GET_POINTER, VARATT_EXTERNAL_IS_COMPRESSED(), VARATT_IS_EXTERNAL(), VARATT_IS_SHORT(), VARDATA(), VARDATA_EXTERNAL(), VARHDRSZ, VARSIZE(), and VARTAG_INDIRECT.

Referenced by ReorderBufferProcessTXN().

◆ ReorderBufferToastReset()

static void ReorderBufferToastReset ( ReorderBuffer rb,
ReorderBufferTXN txn 
)
static

Definition at line 5266 of file reorderbuffer.c.

5267{
5270
5271 if (txn->toast_hash == NULL)
5272 return;
5273
5274 /* sequentially walk over the hash and free everything */
5277 {
5279
5280 if (ent->reconstructed != NULL)
5281 pfree(ent->reconstructed);
5282
5283 dlist_foreach_modify(it, &ent->chunks)
5284 {
5285 ReorderBufferChange *change =
5287
5288 dlist_delete(&change->node);
5289 ReorderBufferFreeChange(rb, change, true);
5290 }
5291 }
5292
5294 txn->toast_hash = NULL;
5295}

References dlist_container, dlist_delete(), dlist_foreach_modify, fb(), hash_destroy(), hash_seq_init(), hash_seq_search(), ReorderBufferChange::node, pfree(), ReorderBufferFreeChange(), and ReorderBufferTXN::toast_hash.

Referenced by ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferFreeTXN(), ReorderBufferProcessTXN(), and ReorderBufferResetTXN().

◆ ReorderBufferTransferSnapToParent()

static void ReorderBufferTransferSnapToParent ( ReorderBufferTXN txn,
ReorderBufferTXN subtxn 
)
static

Definition at line 1165 of file reorderbuffer.c.

1167{
1168 Assert(subtxn->toplevel_xid == txn->xid);
1169
1170 if (subtxn->base_snapshot != NULL)
1171 {
1172 if (txn->base_snapshot == NULL ||
1173 subtxn->base_snapshot_lsn < txn->base_snapshot_lsn)
1174 {
1175 /*
1176 * If the toplevel transaction already has a base snapshot but
1177 * it's newer than the subxact's, purge it.
1178 */
1179 if (txn->base_snapshot != NULL)
1180 {
1183 }
1184
1185 /*
1186 * The snapshot is now the top transaction's; transfer it, and
1187 * adjust the list position of the top transaction in the list by
1188 * moving it to where the subtransaction is.
1189 */
1190 txn->base_snapshot = subtxn->base_snapshot;
1191 txn->base_snapshot_lsn = subtxn->base_snapshot_lsn;
1192 dlist_insert_before(&subtxn->base_snapshot_node,
1193 &txn->base_snapshot_node);
1194
1195 /*
1196 * The subtransaction doesn't have a snapshot anymore (so it
1197 * mustn't be in the list.)
1198 */
1199 subtxn->base_snapshot = NULL;
1200 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1201 dlist_delete(&subtxn->base_snapshot_node);
1202 }
1203 else
1204 {
1205 /* Base snap of toplevel is fine, so subxact's is not needed */
1206 SnapBuildSnapDecRefcount(subtxn->base_snapshot);
1207 dlist_delete(&subtxn->base_snapshot_node);
1208 subtxn->base_snapshot = NULL;
1209 subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
1210 }
1211 }
1212}

References Assert, ReorderBufferTXN::base_snapshot, ReorderBufferTXN::base_snapshot_lsn, ReorderBufferTXN::base_snapshot_node, dlist_delete(), dlist_insert_before(), fb(), InvalidXLogRecPtr, SnapBuildSnapDecRefcount(), and ReorderBufferTXN::xid.

Referenced by ReorderBufferAssignChild(), and ReorderBufferStreamTXN().

◆ ReorderBufferTruncateTXN()

static void ReorderBufferTruncateTXN ( ReorderBuffer rb,
ReorderBufferTXN txn,
bool  txn_prepared 
)
static

Definition at line 1656 of file reorderbuffer.c.

1657{
1658 dlist_mutable_iter iter;
1659 Size mem_freed = 0;
1660
1661 /* cleanup subtransactions & their changes */
1662 dlist_foreach_modify(iter, &txn->subtxns)
1663 {
1665
1667
1668 /*
1669 * Subtransactions are always associated to the toplevel TXN, even if
1670 * they originally were happening inside another subtxn, so we won't
1671 * ever recurse more than one level deep here.
1672 */
1674 Assert(subtxn->nsubtxns == 0);
1675
1678 }
1679
1680 /* cleanup changes in the txn */
1681 dlist_foreach_modify(iter, &txn->changes)
1682 {
1683 ReorderBufferChange *change;
1684
1685 change = dlist_container(ReorderBufferChange, node, iter.cur);
1686
1687 /* Check we're not mixing changes from different transactions. */
1688 Assert(change->txn == txn);
1689
1690 /* remove the change from its containing list */
1691 dlist_delete(&change->node);
1692
1693 /*
1694 * Instead of updating the memory counter for individual changes, we
1695 * sum up the size of memory to free so we can update the memory
1696 * counter all together below. This saves costs of maintaining the
1697 * max-heap.
1698 */
1700
1701 ReorderBufferFreeChange(rb, change, false);
1702 }
1703
1704 /* Update the memory counter */
1706
1707 if (txn_prepared)
1708 {
1709 /*
1710 * If this is a prepared txn, cleanup the tuplecids we stored for
1711 * decoding catalog snapshot access. They are always stored in the
1712 * toplevel transaction.
1713 */
1714 dlist_foreach_modify(iter, &txn->tuplecids)
1715 {
1716 ReorderBufferChange *change;
1717
1718 change = dlist_container(ReorderBufferChange, node, iter.cur);
1719
1720 /* Check we're not mixing changes from different transactions. */
1721 Assert(change->txn == txn);
1723
1724 /* Remove the change from its containing list. */
1725 dlist_delete(&change->node);
1726
1727 ReorderBufferFreeChange(rb, change, true);
1728 }
1729 }
1730
1731 /*
1732 * Destroy the (relfilelocator, ctid) hashtable, so that we don't leak any
1733 * memory. We could also keep the hash table and update it with new ctid
1734 * values, but this seems simpler and good enough for now.
1735 */
1736 if (txn->tuplecid_hash != NULL)
1737 {
1739 txn->tuplecid_hash = NULL;
1740 }
1741
1742 /* If this txn is serialized then clean the disk space. */
1743 if (rbtxn_is_serialized(txn))
1744 {
1747
1748 /*
1749 * We set this flag to indicate if the transaction is ever serialized.
1750 * We need this to accurately update the stats as otherwise the same
1751 * transaction can be counted as serialized multiple times.
1752 */
1754 }
1755
1756 /* also reset the number of entries in the transaction */
1757 txn->nentries_mem = 0;
1758 txn->nentries = 0;
1759}

References ReorderBufferChange::action, Assert, ReorderBufferTXN::changes, dlist_mutable_iter::cur, dlist_container, dlist_delete(), dlist_foreach_modify, fb(), hash_destroy(), ReorderBufferTXN::nentries, ReorderBufferTXN::nentries_mem, ReorderBufferChange::node, rbtxn_is_known_subxact, rbtxn_is_serialized, RBTXN_IS_SERIALIZED_CLEAR, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, ReorderBufferChangeMemoryUpdate(), ReorderBufferChangeSize(), ReorderBufferFreeChange(), ReorderBufferMaybeMarkTXNStreamed(), ReorderBufferRestoreCleanup(), ReorderBufferTruncateTXN(), ReorderBufferTXN::subtxns, ReorderBufferTXN::tuplecid_hash, ReorderBufferTXN::tuplecids, ReorderBufferChange::txn, and ReorderBufferTXN::txn_flags.

Referenced by ReorderBufferCheckAndTruncateAbortedTXN(), ReorderBufferProcessTXN(), ReorderBufferResetTXN(), ReorderBufferStreamCommit(), and ReorderBufferTruncateTXN().

◆ ReorderBufferTXNByXid()

static ReorderBufferTXN * ReorderBufferTXNByXid ( ReorderBuffer rb,
TransactionId  xid,
bool  create,
bool is_new,
XLogRecPtr  lsn,
bool  create_as_top 
)
static

Definition at line 653 of file reorderbuffer.c.

655{
656 ReorderBufferTXN *txn;
658 bool found;
659
661
662 /*
663 * Check the one-entry lookup cache first
664 */
665 if (TransactionIdIsValid(rb->by_txn_last_xid) &&
666 rb->by_txn_last_xid == xid)
667 {
668 txn = rb->by_txn_last_txn;
669
670 if (txn != NULL)
671 {
672 /* found it, and it's valid */
673 if (is_new)
674 *is_new = false;
675 return txn;
676 }
677
678 /*
679 * cached as non-existent, and asked not to create? Then nothing else
680 * to do.
681 */
682 if (!create)
683 return NULL;
684 /* otherwise fall through to create it */
685 }
686
687 /*
688 * If the cache wasn't hit or it yielded a "does-not-exist" and we want to
689 * create an entry.
690 */
691
692 /* search the lookup table */
694 hash_search(rb->by_txn,
695 &xid,
696 create ? HASH_ENTER : HASH_FIND,
697 &found);
698 if (found)
699 txn = ent->txn;
700 else if (create)
701 {
702 /* initialize the new entry, if creation was requested */
703 Assert(ent != NULL);
705
707 ent->txn->xid = xid;
708 txn = ent->txn;
709 txn->first_lsn = lsn;
710 txn->restart_decoding_lsn = rb->current_restart_decoding_lsn;
711
712 if (create_as_top)
713 {
714 dlist_push_tail(&rb->toplevel_by_lsn, &txn->node);
716 }
717 }
718 else
719 txn = NULL; /* not found and not asked to create */
720
721 /* update cache */
722 rb->by_txn_last_xid = xid;
723 rb->by_txn_last_txn = txn;
724
725 if (is_new)
726 *is_new = !found;
727
728 Assert(!create || txn != NULL);
729 return txn;
730}

References Assert, AssertTXNLsnOrder(), dlist_push_tail(), fb(), ReorderBufferTXN::first_lsn, HASH_ENTER, HASH_FIND, hash_search(), ReorderBufferTXN::node, ReorderBufferAllocTXN(), ReorderBufferTXN::restart_decoding_lsn, TransactionIdIsValid, and XLogRecPtrIsValid.

Referenced by ReorderBufferAbort(), ReorderBufferAddDistributedInvalidations(), ReorderBufferAddInvalidations(), ReorderBufferAddNewTupleCids(), ReorderBufferAssignChild(), ReorderBufferCommit(), ReorderBufferCommitChild(), ReorderBufferFinishPrepared(), ReorderBufferForget(), ReorderBufferGetInvalidations(), ReorderBufferInvalidate(), ReorderBufferPrepare(), ReorderBufferProcessXid(), ReorderBufferQueueChange(), ReorderBufferQueueMessage(), ReorderBufferRememberPrepareInfo(), ReorderBufferSetBaseSnapshot(), ReorderBufferSkipPrepare(), ReorderBufferXidHasBaseSnapshot(), ReorderBufferXidHasCatalogChanges(), and ReorderBufferXidSetCatalogChanges().

◆ ReorderBufferTXNSizeCompare()

static int ReorderBufferTXNSizeCompare ( const pairingheap_node a,
const pairingheap_node b,
void arg 
)
static

Definition at line 3790 of file reorderbuffer.c.

3791{
3794
3795 if (ta->size < tb->size)
3796 return -1;
3797 if (ta->size > tb->size)
3798 return 1;
3799 return 0;
3800}

References a, b, fb(), and pairingheap_const_container.

Referenced by ReorderBufferAllocate().

◆ ReorderBufferXidHasBaseSnapshot()

bool ReorderBufferXidHasBaseSnapshot ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 3743 of file reorderbuffer.c.

3744{
3745 ReorderBufferTXN *txn;
3746
3747 txn = ReorderBufferTXNByXid(rb, xid, false,
3748 NULL, InvalidXLogRecPtr, false);
3749
3750 /* transaction isn't known yet, ergo no snapshot */
3751 if (txn == NULL)
3752 return false;
3753
3754 /* a known subtxn? operate on top-level txn instead */
3755 if (rbtxn_is_known_subxact(txn))
3756 txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, false,
3757 NULL, InvalidXLogRecPtr, false);
3758
3759 return txn->base_snapshot != NULL;
3760}

References ReorderBufferTXN::base_snapshot, fb(), InvalidXLogRecPtr, rbtxn_is_known_subxact, ReorderBufferTXNByXid(), and ReorderBufferTXN::toplevel_xid.

Referenced by SnapBuildCommitTxn(), SnapBuildDistributeSnapshotAndInval(), and SnapBuildProcessChange().

◆ ReorderBufferXidHasCatalogChanges()

bool ReorderBufferXidHasCatalogChanges ( ReorderBuffer rb,
TransactionId  xid 
)

Definition at line 3726 of file reorderbuffer.c.

3727{
3728 ReorderBufferTXN *txn;
3729
3730 txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
3731 false);
3732 if (txn == NULL)
3733 return false;
3734
3735 return rbtxn_has_catalog_changes(txn);
3736}

References fb(), InvalidXLogRecPtr, rbtxn_has_catalog_changes, and ReorderBufferTXNByXid().

Referenced by SnapBuildXidHasCatalogChanges().

◆ ReorderBufferXidSetCatalogChanges()

void ReorderBufferXidSetCatalogChanges ( ReorderBuffer rb,
TransactionId  xid,
XLogRecPtr  lsn 
)

Definition at line 3654 of file reorderbuffer.c.

3656{
3657 ReorderBufferTXN *txn;
3658
3659 txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
3660
3661 if (!rbtxn_has_catalog_changes(txn))
3662 {
3664 dclist_push_tail(&rb->catchange_txns, &txn->catchange_node);
3665 }
3666
3667 /*
3668 * Mark top-level transaction as having catalog changes too if one of its
3669 * children has so that the ReorderBufferBuildTupleCidHash can
3670 * conveniently check just top-level transaction and decide whether to
3671 * build the hash table or not.
3672 */
3673 if (rbtxn_is_subtxn(txn))
3674 {
3675 ReorderBufferTXN *toptxn = rbtxn_get_toptxn(txn);
3676
3677 if (!rbtxn_has_catalog_changes(toptxn))
3678 {
3680 dclist_push_tail(&rb->catchange_txns, &toptxn->catchange_node);
3681 }
3682 }
3683}

References ReorderBufferTXN::catchange_node, dclist_push_tail(), fb(), rbtxn_get_toptxn, RBTXN_HAS_CATALOG_CHANGES, rbtxn_has_catalog_changes, rbtxn_is_subtxn, ReorderBufferTXNByXid(), and ReorderBufferTXN::txn_flags.

Referenced by SnapBuildProcessNewCid(), and xact_decode().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)

Definition at line 5558 of file reorderbuffer.c.

5562{
5565 ForkNumber forkno;
5566 BlockNumber blockno;
5567 bool updated_mapping = false;
5568
5569 /*
5570 * Return unresolved if tuplecid_data is not valid. That's because when
5571 * streaming in-progress transactions we may run into tuples with the CID
5572 * before actually decoding them. Think e.g. about INSERT followed by
5573 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5574 * INSERT. So in such cases, we assume the CID is from the future
5575 * command.
5576 */
5577 if (tuplecid_data == NULL)
5578 return false;
5579
5580 /* be careful about padding */
5581 memset(&key, 0, sizeof(key));
5582
5583 Assert(!BufferIsLocal(buffer));
5584
5585 /*
5586 * get relfilelocator from the buffer, no convenient way to access it
5587 * other than that.
5588 */
5589 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5590
5591 /* tuples can only be in the main fork */
5592 Assert(forkno == MAIN_FORKNUM);
5593 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5594
5595 ItemPointerCopy(&htup->t_self,
5596 &key.tid);
5597
5598restart:
5601
5602 /*
5603 * failed to find a mapping, check whether the table was rewritten and
5604 * apply mapping if so, but only do that once - there can be no new
5605 * mappings while we are in here since we have to hold a lock on the
5606 * relation.
5607 */
5608 if (ent == NULL && !updated_mapping)
5609 {
5611 /* now check but don't update for a mapping again */
5612 updated_mapping = true;
5613 goto restart;
5614 }
5615 else if (ent == NULL)
5616 return false;
5617
5618 if (cmin)
5619 *cmin = ent->cmin;
5620 if (cmax)
5621 *cmax = ent->cmax;
5622 return true;
5623}

References Assert, BufferGetTag(), BufferIsLocal, fb(), HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ SetupCheckXidLive()

static void SetupCheckXidLive ( TransactionId  xid)
inlinestatic

Definition at line 2049 of file reorderbuffer.c.

2050{
2051 /*
2052 * If the input transaction id is already set as a CheckXidAlive then
2053 * nothing to do.
2054 */
2056 return;
2057
2058 /*
2059 * setup CheckXidAlive if it's not committed yet. We don't check if the
2060 * xid is aborted. That will happen during catalog access.
2061 */
2062 if (!TransactionIdDidCommit(xid))
2063 CheckXidAlive = xid;
2064 else
2066}

References CheckXidAlive, InvalidTransactionId, TransactionIdDidCommit(), and TransactionIdEquals.

Referenced by ReorderBufferProcessTXN().

◆ StartupReorderBuffer()

void StartupReorderBuffer ( void  )

Definition at line 4942 of file reorderbuffer.c.

4943{
4945 struct dirent *logical_de;
4946
4949 {
4950 if (strcmp(logical_de->d_name, ".") == 0 ||
4951 strcmp(logical_de->d_name, "..") == 0)
4952 continue;
4953
4954 /* if it cannot be a slot, skip the directory */
4955 if (!ReplicationSlotValidateName(logical_de->d_name, true, DEBUG2))
4956 continue;
4957
4958 /*
4959 * ok, has to be a surviving logical slot, iterate and delete
4960 * everything starting with xid-*
4961 */
4963 }
4965}

References AllocateDir(), DEBUG2, fb(), FreeDir(), PG_REPLSLOT_DIR, ReadDir(), ReorderBufferCleanupSerializedTXNs(), and ReplicationSlotValidateName().

Referenced by StartupXLOG().

◆ TransactionIdInArray()

static bool TransactionIdInArray ( TransactionId  xid,
TransactionId xip,
Size  num 
)
static

Definition at line 5457 of file reorderbuffer.c.

5458{
5459 return bsearch(&xid, xip, num,
5460 sizeof(TransactionId), xidComparator) != NULL;
5461}

References fb(), and xidComparator().

Referenced by UpdateLogicalMappings().

◆ UpdateLogicalMappings()

static void UpdateLogicalMappings ( HTAB tuplecid_data,
Oid  relid,
Snapshot  snapshot 
)
static

Definition at line 5480 of file reorderbuffer.c.

5481{
5483 struct dirent *mapping_de;
5484 List *files = NIL;
5485 ListCell *file;
5486 Oid dboid = IsSharedRelation(relid) ? InvalidOid : MyDatabaseId;
5487
5490 {
5491 Oid f_dboid;
5492 Oid f_relid;
5496 uint32 f_hi,
5497 f_lo;
5499
5500 if (strcmp(mapping_de->d_name, ".") == 0 ||
5501 strcmp(mapping_de->d_name, "..") == 0)
5502 continue;
5503
5504 /* Ignore files that aren't ours */
5505 if (strncmp(mapping_de->d_name, "map-", 4) != 0)
5506 continue;
5507
5509 &f_dboid, &f_relid, &f_hi, &f_lo,
5510 &f_mapped_xid, &f_create_xid) != 6)
5511 elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
5512
5513 f_lsn = ((uint64) f_hi) << 32 | f_lo;
5514
5515 /* mapping for another database */
5516 if (f_dboid != dboid)
5517 continue;
5518
5519 /* mapping for another relation */
5520 if (f_relid != relid)
5521 continue;
5522
5523 /* did the creating transaction abort? */
5525 continue;
5526
5527 /* not for our transaction */
5528 if (!TransactionIdInArray(f_mapped_xid, snapshot->subxip, snapshot->subxcnt))
5529 continue;
5530
5531 /* ok, relevant, queue for apply */
5533 f->lsn = f_lsn;
5534 strcpy(f->fname, mapping_de->d_name);
5535 files = lappend(files, f);
5536 }
5538
5539 /* sort files so we apply them in LSN order */
5541
5542 foreach(file, files)
5543 {
5545
5546 elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
5547 snapshot->subxip[0]);
5549 pfree(f);
5550 }
5551}

References AllocateDir(), ApplyLogicalMappingFile(), DEBUG1, elog, ERROR, fb(), file_sort_by_lsn(), RewriteMappingFile::fname, FreeDir(), InvalidOid, IsSharedRelation(), lappend(), lfirst, list_sort(), LOGICAL_REWRITE_FORMAT, RewriteMappingFile::lsn, MyDatabaseId, NIL, palloc_object, pfree(), PG_LOGICAL_MAPPINGS_DIR, ReadDir(), SnapshotData::subxcnt, SnapshotData::subxip, TransactionIdDidCommit(), TransactionIdInArray(), and tuplecid_data.

Referenced by ResolveCminCmaxDuringDecoding().

Variable Documentation

◆ debug_logical_replication_streaming

◆ logical_decoding_work_mem

int logical_decoding_work_mem

Definition at line 225 of file reorderbuffer.c.

Referenced by ReorderBufferCheckMemoryLimit().

◆ max_changes_in_memory

const Size max_changes_in_memory = 4096
static

Definition at line 226 of file reorderbuffer.c.

Referenced by ReorderBufferRestoreChanges().