walsender_8c_source.html

/*-------------------------------------------------------------------------

 *

 * walsender.c

 *

 * The WAL sender process (walsender) is new as of Postgres 9.0. It takes

 * care of sending XLOG from the primary server to a single recipient.

 * (Note that there can be more than one walsender process concurrently.)

 * It is started by the postmaster when the walreceiver of a standby server

 * connects to the primary server and requests XLOG streaming replication.

 *

 * A walsender is similar to a regular backend, ie. there is a one-to-one

 * relationship between a connection and a walsender process, but instead

 * of processing SQL queries, it understands a small set of special

 * replication-mode commands. The START_REPLICATION command begins streaming

 * WAL to the client. While streaming, the walsender keeps reading XLOG

 * records from the disk and sends them to the standby server over the

 * COPY protocol, until either side ends the replication by exiting COPY

 * mode (or until the connection is closed).

 *

 * Normal termination is by SIGTERM, which instructs the walsender to

 * close the connection and exit(0) at the next convenient moment. Emergency

 * termination is by SIGQUIT; like any backend, the walsender will simply

 * abort and exit on SIGQUIT. A close of the connection and a FATAL error

 * are treated as not a crash but approximately normal termination;

 * the walsender will exit quickly without sending any more XLOG records.

 *

 * If the server is shut down, checkpointer sends us

 * PROCSIG_WALSND_INIT_STOPPING after all regular backends have exited.  If

 * the backend is idle or runs an SQL query this causes the backend to

 * shutdown, if logical replication is in progress all existing WAL records

 * are processed followed by a shutdown.  Otherwise this causes the walsender

 * to switch to the "stopping" state. In this state, the walsender will reject

 * any further replication commands. The checkpointer begins the shutdown

 * checkpoint once all walsenders are confirmed as stopping. When the shutdown

 * checkpoint finishes, the postmaster sends us SIGUSR2. This instructs

 * walsender to send any outstanding WAL, including the shutdown checkpoint

 * record, wait for it to be replicated to the standby, and then exit.

 *

 *

 * Portions Copyright (c) 2010-2025, PostgreSQL Global Development Group

 *

 * IDENTIFICATION

 *    src/backend/replication/walsender.c

 *

 *-------------------------------------------------------------------------

 */

#include "postgres.h"


#include <signal.h>

#include <unistd.h>


#include "access/timeline.h"

#include "access/transam.h"

#include "access/xact.h"

#include "access/xlog_internal.h"

#include "access/xlogreader.h"

#include "access/xlogrecovery.h"

#include "access/xlogutils.h"

#include "backup/basebackup.h"

#include "backup/basebackup_incremental.h"

#include "catalog/pg_authid.h"

#include "catalog/pg_type.h"

#include "commands/dbcommands.h"

#include "commands/defrem.h"

#include "funcapi.h"

#include "libpq/libpq.h"

#include "libpq/pqformat.h"

#include "miscadmin.h"

#include "nodes/replnodes.h"

#include "pgstat.h"

#include "postmaster/interrupt.h"

#include "replication/decode.h"

#include "replication/logical.h"

#include "replication/slotsync.h"

#include "replication/slot.h"

#include "replication/snapbuild.h"

#include "replication/syncrep.h"

#include "replication/walreceiver.h"

#include "replication/walsender.h"

#include "replication/walsender_private.h"

#include "storage/condition_variable.h"

#include "storage/aio_subsys.h"

#include "storage/fd.h"

#include "storage/ipc.h"

#include "storage/pmsignal.h"

#include "storage/proc.h"

#include "tcop/dest.h"

#include "tcop/tcopprot.h"

#include "utils/acl.h"

#include "utils/builtins.h"

#include "utils/guc.h"

#include "utils/memutils.h"

#include "utils/pg_lsn.h"

#include "utils/pgstat_internal.h"

#include "utils/ps_status.h"

#include "utils/timeout.h"

#include "utils/timestamp.h"


/* Minimum interval used by walsender for stats flushes, in ms */

#define WALSENDER_STATS_FLUSH_INTERVAL         1000


/*

 * Maximum data payload in a WAL data message.  Must be >= XLOG_BLCKSZ.

 *

 * We don't have a good idea of what a good value would be; there's some

 * overhead per message in both walsender and walreceiver, but on the other

 * hand sending large batches makes walsender less responsive to signals

 * because signals are checked only between messages.  128kB (with

 * default 8k blocks) seems like a reasonable guess for now.

 */

#define MAX_SEND_SIZE (XLOG_BLCKSZ * 16)


/* Array of WalSnds in shared memory */

WalSndCtlData *WalSndCtl = NULL;


/* My slot in the shared memory array */

WalSnd     *MyWalSnd = NULL;


/* Global state */

bool        am_walsender = false;   /* Am I a walsender process? */

bool        am_cascading_walsender = false; /* Am I cascading WAL to another

                                             * standby? */

bool        am_db_walsender = false;    /* Connected to a database? */


/* GUC variables */

int         max_wal_senders = 10;   /* the maximum number of concurrent

                                     * walsenders */

int         wal_sender_timeout = 60 * 1000; /* maximum time to send one WAL

                                             * data message */

bool        log_replication_commands = false;


/*

 * State for WalSndWakeupRequest

 */

bool        wake_wal_senders = false;


/*

 * xlogreader used for replication.  Note that a WAL sender doing physical

 * replication does not need xlogreader to read WAL, but it needs one to

 * keep a state of its work.

 */

static XLogReaderState *xlogreader = NULL;


/*

 * If the UPLOAD_MANIFEST command is used to provide a backup manifest in

 * preparation for an incremental backup, uploaded_manifest will be point

 * to an object containing information about its contexts, and

 * uploaded_manifest_mcxt will point to the memory context that contains

 * that object and all of its subordinate data. Otherwise, both values will

 * be NULL.

 */

static IncrementalBackupInfo *uploaded_manifest = NULL;

static MemoryContext uploaded_manifest_mcxt = NULL;


/*

 * These variables keep track of the state of the timeline we're currently

 * sending. sendTimeLine identifies the timeline. If sendTimeLineIsHistoric,

 * the timeline is not the latest timeline on this server, and the server's

 * history forked off from that timeline at sendTimeLineValidUpto.

 */

static TimeLineID sendTimeLine = 0;

static TimeLineID sendTimeLineNextTLI = 0;

static bool sendTimeLineIsHistoric = false;

static XLogRecPtr sendTimeLineValidUpto = InvalidXLogRecPtr;


/*

 * How far have we sent WAL already? This is also advertised in

 * MyWalSnd->sentPtr.  (Actually, this is the next WAL location to send.)

 */

static XLogRecPtr sentPtr = InvalidXLogRecPtr;


/* Buffers for constructing outgoing messages and processing reply messages. */

static StringInfoData output_message;

static StringInfoData reply_message;

static StringInfoData tmpbuf;


/* Timestamp of last ProcessRepliesIfAny(). */

static TimestampTz last_processing = 0;


/*

 * Timestamp of last ProcessRepliesIfAny() that saw a reply from the

 * standby. Set to 0 if wal_sender_timeout doesn't need to be active.

 */

static TimestampTz last_reply_timestamp = 0;


/* Have we sent a heartbeat message asking for reply, since last reply? */

static bool waiting_for_ping_response = false;


/*

 * While streaming WAL in Copy mode, streamingDoneSending is set to true

 * after we have sent CopyDone. We should not send any more CopyData messages

 * after that. streamingDoneReceiving is set to true when we receive CopyDone

 * from the other end. When both become true, it's time to exit Copy mode.

 */

static bool streamingDoneSending;

static bool streamingDoneReceiving;


/* Are we there yet? */

static bool WalSndCaughtUp = false;


/* Flags set by signal handlers for later service in main loop */

static volatile sig_atomic_t got_SIGUSR2 = false;

static volatile sig_atomic_t got_STOPPING = false;


/*

 * This is set while we are streaming. When not set

 * PROCSIG_WALSND_INIT_STOPPING signal will be handled like SIGTERM. When set,

 * the main loop is responsible for checking got_STOPPING and terminating when

 * it's set (after streaming any remaining WAL).

 */

static volatile sig_atomic_t replication_active = false;


static LogicalDecodingContext *logical_decoding_ctx = NULL;


/* A sample associating a WAL location with the time it was written. */

typedef struct

{

    XLogRecPtr  lsn;

    TimestampTz time;

} WalTimeSample;


/* The size of our buffer of time samples. */

#define LAG_TRACKER_BUFFER_SIZE 8192


/* A mechanism for tracking replication lag. */

typedef struct

{

    XLogRecPtr  last_lsn;

    WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE];

    int         write_head;

    int         read_heads[NUM_SYNC_REP_WAIT_MODE];

    WalTimeSample last_read[NUM_SYNC_REP_WAIT_MODE];

} LagTracker;


static LagTracker *lag_tracker;


/* Signal handlers */

static void WalSndLastCycleHandler(SIGNAL_ARGS);


/* Prototypes for private functions */

typedef void (*WalSndSendDataCallback) (void);

static void WalSndLoop(WalSndSendDataCallback send_data);

static void InitWalSenderSlot(void);

static void WalSndKill(int code, Datum arg);

pg_noreturn static void WalSndShutdown(void);

static void XLogSendPhysical(void);

static void XLogSendLogical(void);

static void WalSndDone(WalSndSendDataCallback send_data);

static void IdentifySystem(void);

static void UploadManifest(void);

static bool HandleUploadManifestPacket(StringInfo buf, off_t *offset,

                                       IncrementalBackupInfo *ib);

static void ReadReplicationSlot(ReadReplicationSlotCmd *cmd);

static void CreateReplicationSlot(CreateReplicationSlotCmd *cmd);

static void DropReplicationSlot(DropReplicationSlotCmd *cmd);

static void StartReplication(StartReplicationCmd *cmd);

static void StartLogicalReplication(StartReplicationCmd *cmd);

static void ProcessStandbyMessage(void);

static void ProcessStandbyReplyMessage(void);

static void ProcessStandbyHSFeedbackMessage(void);

static void ProcessRepliesIfAny(void);

static void ProcessPendingWrites(void);

static void WalSndKeepalive(bool requestReply, XLogRecPtr writePtr);

static void WalSndKeepaliveIfNecessary(void);

static void WalSndCheckTimeOut(void);

static long WalSndComputeSleeptime(TimestampTz now);

static void WalSndWait(uint32 socket_events, long timeout, uint32 wait_event);

static void WalSndPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, bool last_write);

static void WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, bool last_write);

static void WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,

                                 bool skipped_xact);

static XLogRecPtr WalSndWaitForWal(XLogRecPtr loc);

static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time);

static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now);

static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch);


static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,

                              TimeLineID *tli_p);


/* Initialize walsender process before entering the main command loop */

void

InitWalSender(void)

{

    am_cascading_walsender = RecoveryInProgress();


    /* Create a per-walsender data structure in shared memory */

    InitWalSenderSlot();


    /* need resource owner for e.g. basebackups */

    CreateAuxProcessResourceOwner();


    /*

     * Let postmaster know that we're a WAL sender. Once we've declared us as

     * a WAL sender process, postmaster will let us outlive the bgwriter and

     * kill us last in the shutdown sequence, so we get a chance to stream all

     * remaining WAL at shutdown, including the shutdown checkpoint. Note that

     * there's no going back, and we mustn't write any WAL records after this.

     */

    MarkPostmasterChildWalSender();

    SendPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE);


    /*

     * If the client didn't specify a database to connect to, show in PGPROC

     * that our advertised xmin should affect vacuum horizons in all

     * databases.  This allows physical replication clients to send hot

     * standby feedback that will delay vacuum cleanup in all databases.

     */

    if (MyDatabaseId == InvalidOid)

    {

        Assert(MyProc->xmin == InvalidTransactionId);

        LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);

        MyProc->statusFlags |= PROC_AFFECTS_ALL_HORIZONS;

        ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;

        LWLockRelease(ProcArrayLock);

    }


    /* Initialize empty timestamp buffer for lag tracking. */

    lag_tracker = MemoryContextAllocZero(TopMemoryContext, sizeof(LagTracker));

}


/*

 * Clean up after an error.

 *

 * WAL sender processes don't use transactions like regular backends do.

 * This function does any cleanup required after an error in a WAL sender

 * process, similar to what transaction abort does in a regular backend.

 */

void

WalSndErrorCleanup(void)

{

    LWLockReleaseAll();

    ConditionVariableCancelSleep();

    pgstat_report_wait_end();

    pgaio_error_cleanup();


    if (xlogreader != NULL && xlogreader->seg.ws_file >= 0)

        wal_segment_close(xlogreader);


    if (MyReplicationSlot != NULL)

        ReplicationSlotRelease();


    ReplicationSlotCleanup(false);


    replication_active = false;


    /*

     * If there is a transaction in progress, it will clean up our

     * ResourceOwner, but if a replication command set up a resource owner

     * without a transaction, we've got to clean that up now.

     */

    if (!IsTransactionOrTransactionBlock())

        ReleaseAuxProcessResources(false);


    if (got_STOPPING || got_SIGUSR2)

        proc_exit(0);


    /* Revert back to startup state */

    WalSndSetState(WALSNDSTATE_STARTUP);

}


/*

 * Handle a client's connection abort in an orderly manner.

 */

static void

WalSndShutdown(void)

{

    /*

     * Reset whereToSendOutput to prevent ereport from attempting to send any

     * more messages to the standby.

     */

    if (whereToSendOutput == DestRemote)

        whereToSendOutput = DestNone;


    proc_exit(0);

    abort();                    /* keep the compiler quiet */

}


/*

 * Handle the IDENTIFY_SYSTEM command.

 */

static void

IdentifySystem(void)

{

    char        sysid[32];

    char        xloc[MAXFNAMELEN];

    XLogRecPtr  logptr;

    char       *dbname = NULL;

    DestReceiver *dest;

    TupOutputState *tstate;

    TupleDesc   tupdesc;

    Datum       values[4];

    bool        nulls[4] = {0};

    TimeLineID  currTLI;


    /*

     * Reply with a result set with one row, four columns. First col is system

     * ID, second is timeline ID, third is current xlog location and the

     * fourth contains the database name if we are connected to one.

     */


    snprintf(sysid, sizeof(sysid), UINT64_FORMAT,

             GetSystemIdentifier());


    am_cascading_walsender = RecoveryInProgress();

    if (am_cascading_walsender)

        logptr = GetStandbyFlushRecPtr(&currTLI);

    else

        logptr = GetFlushRecPtr(&currTLI);


    snprintf(xloc, sizeof(xloc), "%X/%X", LSN_FORMAT_ARGS(logptr));


    if (MyDatabaseId != InvalidOid)

    {

        MemoryContext cur = CurrentMemoryContext;


        /* syscache access needs a transaction env. */

        StartTransactionCommand();

        dbname = get_database_name(MyDatabaseId);

        /* copy dbname out of TX context */

        dbname = MemoryContextStrdup(cur, dbname);

        CommitTransactionCommand();

    }


    dest = CreateDestReceiver(DestRemoteSimple);


    /* need a tuple descriptor representing four columns */

    tupdesc = CreateTemplateTupleDesc(4);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "systemid",

                              TEXTOID, -1, 0);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "timeline",

                              INT8OID, -1, 0);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "xlogpos",

                              TEXTOID, -1, 0);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 4, "dbname",

                              TEXTOID, -1, 0);


    /* prepare for projection of tuples */

    tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);


    /* column 1: system identifier */

    values[0] = CStringGetTextDatum(sysid);


    /* column 2: timeline */

    values[1] = Int64GetDatum(currTLI);


    /* column 3: wal location */

    values[2] = CStringGetTextDatum(xloc);


    /* column 4: database name, or NULL if none */

    if (dbname)

        values[3] = CStringGetTextDatum(dbname);

    else

        nulls[3] = true;


    /* send it to dest */

    do_tup_output(tstate, values, nulls);


    end_tup_output(tstate);

}


/* Handle READ_REPLICATION_SLOT command */

static void

ReadReplicationSlot(ReadReplicationSlotCmd *cmd)

{

#define READ_REPLICATION_SLOT_COLS 3

    ReplicationSlot *slot;

    DestReceiver *dest;

    TupOutputState *tstate;

    TupleDesc   tupdesc;

    Datum       values[READ_REPLICATION_SLOT_COLS] = {0};

    bool        nulls[READ_REPLICATION_SLOT_COLS];


    tupdesc = CreateTemplateTupleDesc(READ_REPLICATION_SLOT_COLS);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "slot_type",

                              TEXTOID, -1, 0);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "restart_lsn",

                              TEXTOID, -1, 0);

    /* TimeLineID is unsigned, so int4 is not wide enough. */

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "restart_tli",

                              INT8OID, -1, 0);


    memset(nulls, true, READ_REPLICATION_SLOT_COLS * sizeof(bool));


    LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);

    slot = SearchNamedReplicationSlot(cmd->slotname, false);

    if (slot == NULL || !slot->in_use)

    {

        LWLockRelease(ReplicationSlotControlLock);

    }

    else

    {

        ReplicationSlot slot_contents;

        int         i = 0;


        /* Copy slot contents while holding spinlock */

        SpinLockAcquire(&slot->mutex);

        slot_contents = *slot;

        SpinLockRelease(&slot->mutex);

        LWLockRelease(ReplicationSlotControlLock);


        if (OidIsValid(slot_contents.data.database))

            ereport(ERROR,

                    errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

                    errmsg("cannot use %s with a logical replication slot",

                           "READ_REPLICATION_SLOT"));


        /* slot type */

        values[i] = CStringGetTextDatum("physical");

        nulls[i] = false;

        i++;


        /* start LSN */

        if (!XLogRecPtrIsInvalid(slot_contents.data.restart_lsn))

        {

            char        xloc[64];


            snprintf(xloc, sizeof(xloc), "%X/%X",

                     LSN_FORMAT_ARGS(slot_contents.data.restart_lsn));

            values[i] = CStringGetTextDatum(xloc);

            nulls[i] = false;

        }

        i++;


        /* timeline this WAL was produced on */

        if (!XLogRecPtrIsInvalid(slot_contents.data.restart_lsn))

        {

            TimeLineID  slots_position_timeline;

            TimeLineID  current_timeline;

            List       *timeline_history = NIL;


            /*

             * While in recovery, use as timeline the currently-replaying one

             * to get the LSN position's history.

             */

            if (RecoveryInProgress())

                (void) GetXLogReplayRecPtr(&current_timeline);

            else

                current_timeline = GetWALInsertionTimeLine();


            timeline_history = readTimeLineHistory(current_timeline);

            slots_position_timeline = tliOfPointInHistory(slot_contents.data.restart_lsn,

                                                          timeline_history);

            values[i] = Int64GetDatum((int64) slots_position_timeline);

            nulls[i] = false;

        }

        i++;


        Assert(i == READ_REPLICATION_SLOT_COLS);

    }


    dest = CreateDestReceiver(DestRemoteSimple);

    tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);

    do_tup_output(tstate, values, nulls);

    end_tup_output(tstate);

}


/*

 * Handle TIMELINE_HISTORY command.

 */

static void

SendTimeLineHistory(TimeLineHistoryCmd *cmd)

{

    DestReceiver *dest;

    TupleDesc   tupdesc;

    StringInfoData buf;

    char        histfname[MAXFNAMELEN];

    char        path[MAXPGPATH];

    int         fd;

    off_t       histfilelen;

    off_t       bytesleft;

    Size        len;


    dest = CreateDestReceiver(DestRemoteSimple);


    /*

     * Reply with a result set with one row, and two columns. The first col is

     * the name of the history file, 2nd is the contents.

     */

    tupdesc = CreateTemplateTupleDesc(2);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "filename", TEXTOID, -1, 0);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "content", TEXTOID, -1, 0);


    TLHistoryFileName(histfname, cmd->timeline);

    TLHistoryFilePath(path, cmd->timeline);


    /* Send a RowDescription message */

    dest->rStartup(dest, CMD_SELECT, tupdesc);


    /* Send a DataRow message */

    pq_beginmessage(&buf, PqMsg_DataRow);

    pq_sendint16(&buf, 2);      /* # of columns */

    len = strlen(histfname);

    pq_sendint32(&buf, len);    /* col1 len */

    pq_sendbytes(&buf, histfname, len);


    fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);

    if (fd < 0)

        ereport(ERROR,

                (errcode_for_file_access(),

                 errmsg("could not open file \"%s\": %m", path)));


    /* Determine file length and send it to client */

    histfilelen = lseek(fd, 0, SEEK_END);

    if (histfilelen < 0)

        ereport(ERROR,

                (errcode_for_file_access(),

                 errmsg("could not seek to end of file \"%s\": %m", path)));

    if (lseek(fd, 0, SEEK_SET) != 0)

        ereport(ERROR,

                (errcode_for_file_access(),

                 errmsg("could not seek to beginning of file \"%s\": %m", path)));


    pq_sendint32(&buf, histfilelen);    /* col2 len */


    bytesleft = histfilelen;

    while (bytesleft > 0)

    {

        PGAlignedBlock rbuf;

        int         nread;


        pgstat_report_wait_start(WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ);

        nread = read(fd, rbuf.data, sizeof(rbuf));

        pgstat_report_wait_end();

        if (nread < 0)

            ereport(ERROR,

                    (errcode_for_file_access(),

                     errmsg("could not read file \"%s\": %m",

                            path)));

        else if (nread == 0)

            ereport(ERROR,

                    (errcode(ERRCODE_DATA_CORRUPTED),

                     errmsg("could not read file \"%s\": read %d of %zu",

                            path, nread, (Size) bytesleft)));


        pq_sendbytes(&buf, rbuf.data, nread);

        bytesleft -= nread;

    }


    if (CloseTransientFile(fd) != 0)

        ereport(ERROR,

                (errcode_for_file_access(),

                 errmsg("could not close file \"%s\": %m", path)));


    pq_endmessage(&buf);

}


/*

 * Handle UPLOAD_MANIFEST command.

 */

static void

UploadManifest(void)

{

    MemoryContext mcxt;

    IncrementalBackupInfo *ib;

    off_t       offset = 0;

    StringInfoData buf;


    /*

     * parsing the manifest will use the cryptohash stuff, which requires a

     * resource owner

     */

    Assert(AuxProcessResourceOwner != NULL);

    Assert(CurrentResourceOwner == AuxProcessResourceOwner ||

           CurrentResourceOwner == NULL);

    CurrentResourceOwner = AuxProcessResourceOwner;


    /* Prepare to read manifest data into a temporary context. */

    mcxt = AllocSetContextCreate(CurrentMemoryContext,

                                 "incremental backup information",

                                 ALLOCSET_DEFAULT_SIZES);

    ib = CreateIncrementalBackupInfo(mcxt);


    /* Send a CopyInResponse message */

    pq_beginmessage(&buf, PqMsg_CopyInResponse);

    pq_sendbyte(&buf, 0);

    pq_sendint16(&buf, 0);

    pq_endmessage_reuse(&buf);

    pq_flush();


    /* Receive packets from client until done. */

    while (HandleUploadManifestPacket(&buf, &offset, ib))

        ;


    /* Finish up manifest processing. */

    FinalizeIncrementalManifest(ib);


    /*

     * Discard any old manifest information and arrange to preserve the new

     * information we just got.

     *

     * We assume that MemoryContextDelete and MemoryContextSetParent won't

     * fail, and thus we shouldn't end up bailing out of here in such a way as

     * to leave dangling pointers.

     */

    if (uploaded_manifest_mcxt != NULL)

        MemoryContextDelete(uploaded_manifest_mcxt);

    MemoryContextSetParent(mcxt, CacheMemoryContext);

    uploaded_manifest = ib;

    uploaded_manifest_mcxt = mcxt;


    /* clean up the resource owner we created */

    ReleaseAuxProcessResources(true);

}


/*

 * Process one packet received during the handling of an UPLOAD_MANIFEST

 * operation.

 *

 * 'buf' is scratch space. This function expects it to be initialized, doesn't

 * care what the current contents are, and may override them with completely

 * new contents.

 *

 * The return value is true if the caller should continue processing

 * additional packets and false if the UPLOAD_MANIFEST operation is complete.

 */

static bool

HandleUploadManifestPacket(StringInfo buf, off_t *offset,

                           IncrementalBackupInfo *ib)

{

    int         mtype;

    int         maxmsglen;


    HOLD_CANCEL_INTERRUPTS();


    pq_startmsgread();

    mtype = pq_getbyte();

    if (mtype == EOF)

        ereport(ERROR,

                (errcode(ERRCODE_CONNECTION_FAILURE),

                 errmsg("unexpected EOF on client connection with an open transaction")));


    switch (mtype)

    {

        case 'd':               /* CopyData */

            maxmsglen = PQ_LARGE_MESSAGE_LIMIT;

            break;

        case 'c':               /* CopyDone */

        case 'f':               /* CopyFail */

        case 'H':               /* Flush */

        case 'S':               /* Sync */

            maxmsglen = PQ_SMALL_MESSAGE_LIMIT;

            break;

        default:

            ereport(ERROR,

                    (errcode(ERRCODE_PROTOCOL_VIOLATION),

                     errmsg("unexpected message type 0x%02X during COPY from stdin",

                            mtype)));

            maxmsglen = 0;      /* keep compiler quiet */

            break;

    }


    /* Now collect the message body */

    if (pq_getmessage(buf, maxmsglen))

        ereport(ERROR,

                (errcode(ERRCODE_CONNECTION_FAILURE),

                 errmsg("unexpected EOF on client connection with an open transaction")));

    RESUME_CANCEL_INTERRUPTS();


    /* Process the message */

    switch (mtype)

    {

        case 'd':               /* CopyData */

            AppendIncrementalManifestData(ib, buf->data, buf->len);

            return true;


        case 'c':               /* CopyDone */

            return false;


        case 'H':               /* Sync */

        case 'S':               /* Flush */

            /* Ignore these while in CopyOut mode as we do elsewhere. */

            return true;


        case 'f':

            ereport(ERROR,

                    (errcode(ERRCODE_QUERY_CANCELED),

                     errmsg("COPY from stdin failed: %s",

                            pq_getmsgstring(buf))));

    }


    /* Not reached. */

    Assert(false);

    return false;

}


/*

 * Handle START_REPLICATION command.

 *

 * At the moment, this never returns, but an ereport(ERROR) will take us back

 * to the main loop.

 */

static void

StartReplication(StartReplicationCmd *cmd)

{

    StringInfoData buf;

    XLogRecPtr  FlushPtr;

    TimeLineID  FlushTLI;


    /* create xlogreader for physical replication */

    xlogreader =

        XLogReaderAllocate(wal_segment_size, NULL,

                           XL_ROUTINE(.segment_open = WalSndSegmentOpen,

                                      .segment_close = wal_segment_close),

                           NULL);


    if (!xlogreader)

        ereport(ERROR,

                (errcode(ERRCODE_OUT_OF_MEMORY),

                 errmsg("out of memory"),

                 errdetail("Failed while allocating a WAL reading processor.")));


    /*

     * We assume here that we're logging enough information in the WAL for

     * log-shipping, since this is checked in PostmasterMain().

     *

     * NOTE: wal_level can only change at shutdown, so in most cases it is

     * difficult for there to be WAL data that we can still see that was

     * written at wal_level='minimal'.

     */


    if (cmd->slotname)

    {

        ReplicationSlotAcquire(cmd->slotname, true, true);

        if (SlotIsLogical(MyReplicationSlot))

            ereport(ERROR,

                    (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),

                     errmsg("cannot use a logical replication slot for physical replication")));


        /*

         * We don't need to verify the slot's restart_lsn here; instead we

         * rely on the caller requesting the starting point to use.  If the

         * WAL segment doesn't exist, we'll fail later.

         */

    }


    /*

     * Select the timeline. If it was given explicitly by the client, use

     * that. Otherwise use the timeline of the last replayed record.

     */

    am_cascading_walsender = RecoveryInProgress();

    if (am_cascading_walsender)

        FlushPtr = GetStandbyFlushRecPtr(&FlushTLI);

    else

        FlushPtr = GetFlushRecPtr(&FlushTLI);


    if (cmd->timeline != 0)

    {

        XLogRecPtr  switchpoint;


        sendTimeLine = cmd->timeline;

        if (sendTimeLine == FlushTLI)

        {

            sendTimeLineIsHistoric = false;

            sendTimeLineValidUpto = InvalidXLogRecPtr;

        }

        else

        {

            List       *timeLineHistory;


            sendTimeLineIsHistoric = true;


            /*

             * Check that the timeline the client requested exists, and the

             * requested start location is on that timeline.

             */

            timeLineHistory = readTimeLineHistory(FlushTLI);

            switchpoint = tliSwitchPoint(cmd->timeline, timeLineHistory,

                                         &sendTimeLineNextTLI);

            list_free_deep(timeLineHistory);


            /*

             * Found the requested timeline in the history. Check that

             * requested startpoint is on that timeline in our history.

             *

             * This is quite loose on purpose. We only check that we didn't

             * fork off the requested timeline before the switchpoint. We

             * don't check that we switched *to* it before the requested

             * starting point. This is because the client can legitimately

             * request to start replication from the beginning of the WAL

             * segment that contains switchpoint, but on the new timeline, so

             * that it doesn't end up with a partial segment. If you ask for

             * too old a starting point, you'll get an error later when we

             * fail to find the requested WAL segment in pg_wal.

             *

             * XXX: we could be more strict here and only allow a startpoint

             * that's older than the switchpoint, if it's still in the same

             * WAL segment.

             */

            if (!XLogRecPtrIsInvalid(switchpoint) &&

                switchpoint < cmd->startpoint)

            {

                ereport(ERROR,

                        (errmsg("requested starting point %X/%X on timeline %u is not in this server's history",

                                LSN_FORMAT_ARGS(cmd->startpoint),

                                cmd->timeline),

                         errdetail("This server's history forked from timeline %u at %X/%X.",

                                   cmd->timeline,

                                   LSN_FORMAT_ARGS(switchpoint))));

            }

            sendTimeLineValidUpto = switchpoint;

        }

    }

    else

    {

        sendTimeLine = FlushTLI;

        sendTimeLineValidUpto = InvalidXLogRecPtr;

        sendTimeLineIsHistoric = false;

    }


    streamingDoneSending = streamingDoneReceiving = false;


    /* If there is nothing to stream, don't even enter COPY mode */

    if (!sendTimeLineIsHistoric || cmd->startpoint < sendTimeLineValidUpto)

    {

        /*

         * When we first start replication the standby will be behind the

         * primary. For some applications, for example synchronous

         * replication, it is important to have a clear state for this initial

         * catchup mode, so we can trigger actions when we change streaming

         * state later. We may stay in this state for a long time, which is

         * exactly why we want to be able to monitor whether or not we are

         * still here.

         */

        WalSndSetState(WALSNDSTATE_CATCHUP);


        /* Send a CopyBothResponse message, and start streaming */

        pq_beginmessage(&buf, PqMsg_CopyBothResponse);

        pq_sendbyte(&buf, 0);

        pq_sendint16(&buf, 0);

        pq_endmessage(&buf);

        pq_flush();


        /*

         * Don't allow a request to stream from a future point in WAL that

         * hasn't been flushed to disk in this server yet.

         */

        if (FlushPtr < cmd->startpoint)

        {

            ereport(ERROR,

                    (errmsg("requested starting point %X/%X is ahead of the WAL flush position of this server %X/%X",

                            LSN_FORMAT_ARGS(cmd->startpoint),

                            LSN_FORMAT_ARGS(FlushPtr))));

        }


        /* Start streaming from the requested point */

        sentPtr = cmd->startpoint;


        /* Initialize shared memory status, too */

        SpinLockAcquire(&MyWalSnd->mutex);

        MyWalSnd->sentPtr = sentPtr;

        SpinLockRelease(&MyWalSnd->mutex);


        SyncRepInitConfig();


        /* Main loop of walsender */

        replication_active = true;


        WalSndLoop(XLogSendPhysical);


        replication_active = false;

        if (got_STOPPING)

            proc_exit(0);

        WalSndSetState(WALSNDSTATE_STARTUP);


        Assert(streamingDoneSending && streamingDoneReceiving);

    }


    if (cmd->slotname)

        ReplicationSlotRelease();


    /*

     * Copy is finished now. Send a single-row result set indicating the next

     * timeline.

     */

    if (sendTimeLineIsHistoric)

    {

        char        startpos_str[8 + 1 + 8 + 1];

        DestReceiver *dest;

        TupOutputState *tstate;

        TupleDesc   tupdesc;

        Datum       values[2];

        bool        nulls[2] = {0};


        snprintf(startpos_str, sizeof(startpos_str), "%X/%X",

                 LSN_FORMAT_ARGS(sendTimeLineValidUpto));


        dest = CreateDestReceiver(DestRemoteSimple);


        /*

         * Need a tuple descriptor representing two columns. int8 may seem

         * like a surprising data type for this, but in theory int4 would not

         * be wide enough for this, as TimeLineID is unsigned.

         */

        tupdesc = CreateTemplateTupleDesc(2);

        TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "next_tli",

                                  INT8OID, -1, 0);

        TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "next_tli_startpos",

                                  TEXTOID, -1, 0);


        /* prepare for projection of tuple */

        tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);


        values[0] = Int64GetDatum((int64) sendTimeLineNextTLI);

        values[1] = CStringGetTextDatum(startpos_str);


        /* send it to dest */

        do_tup_output(tstate, values, nulls);


        end_tup_output(tstate);

    }


    /* Send CommandComplete message */

    EndReplicationCommand("START_STREAMING");

}


/*

 * XLogReaderRoutine->page_read callback for logical decoding contexts, as a

 * walsender process.

 *

 * Inside the walsender we can do better than read_local_xlog_page,

 * which has to do a plain sleep/busy loop, because the walsender's latch gets

 * set every time WAL is flushed.

 */

static int

logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,

                       XLogRecPtr targetRecPtr, char *cur_page)

{

    XLogRecPtr  flushptr;

    int         count;

    WALReadError errinfo;

    XLogSegNo   segno;

    TimeLineID  currTLI;


    /*

     * Make sure we have enough WAL available before retrieving the current

     * timeline.

     */

    flushptr = WalSndWaitForWal(targetPagePtr + reqLen);


    /* Fail if not enough (implies we are going to shut down) */

    if (flushptr < targetPagePtr + reqLen)

        return -1;


    /*

     * Since logical decoding is also permitted on a standby server, we need

     * to check if the server is in recovery to decide how to get the current

     * timeline ID (so that it also covers the promotion or timeline change

     * cases). We must determine am_cascading_walsender after waiting for the

     * required WAL so that it is correct when the walsender wakes up after a

     * promotion.

     */

    am_cascading_walsender = RecoveryInProgress();


    if (am_cascading_walsender)

        GetXLogReplayRecPtr(&currTLI);

    else

        currTLI = GetWALInsertionTimeLine();


    XLogReadDetermineTimeline(state, targetPagePtr, reqLen, currTLI);

    sendTimeLineIsHistoric = (state->currTLI != currTLI);

    sendTimeLine = state->currTLI;

    sendTimeLineValidUpto = state->currTLIValidUntil;

    sendTimeLineNextTLI = state->nextTLI;


    if (targetPagePtr + XLOG_BLCKSZ <= flushptr)

        count = XLOG_BLCKSZ;    /* more than one block available */

    else

        count = flushptr - targetPagePtr;   /* part of the page available */


    /* now actually read the data, we know it's there */

    if (!WALRead(state,

                 cur_page,

                 targetPagePtr,

                 count,

                 currTLI,       /* Pass the current TLI because only

                                 * WalSndSegmentOpen controls whether new TLI

                                 * is needed. */

                 &errinfo))

        WALReadRaiseError(&errinfo);


    /*

     * After reading into the buffer, check that what we read was valid. We do

     * this after reading, because even though the segment was present when we

     * opened it, it might get recycled or removed while we read it. The

     * read() succeeds in that case, but the data we tried to read might

     * already have been overwritten with new WAL records.

     */

    XLByteToSeg(targetPagePtr, segno, state->segcxt.ws_segsize);

    CheckXLogRemoved(segno, state->seg.ws_tli);


    return count;

}


/*

 * Process extra options given to CREATE_REPLICATION_SLOT.

 */

static void

parseCreateReplSlotOptions(CreateReplicationSlotCmd *cmd,

                           bool *reserve_wal,

                           CRSSnapshotAction *snapshot_action,

                           bool *two_phase, bool *failover)

{

    ListCell   *lc;

    bool        snapshot_action_given = false;

    bool        reserve_wal_given = false;

    bool        two_phase_given = false;

    bool        failover_given = false;


    /* Parse options */

    foreach(lc, cmd->options)

    {

        DefElem    *defel = (DefElem *) lfirst(lc);


        if (strcmp(defel->defname, "snapshot") == 0)

        {

            char       *action;


            if (snapshot_action_given || cmd->kind != REPLICATION_KIND_LOGICAL)

                ereport(ERROR,

                        (errcode(ERRCODE_SYNTAX_ERROR),

                         errmsg("conflicting or redundant options")));


            action = defGetString(defel);

            snapshot_action_given = true;


            if (strcmp(action, "export") == 0)

                *snapshot_action = CRS_EXPORT_SNAPSHOT;

            else if (strcmp(action, "nothing") == 0)

                *snapshot_action = CRS_NOEXPORT_SNAPSHOT;

            else if (strcmp(action, "use") == 0)

                *snapshot_action = CRS_USE_SNAPSHOT;

            else

                ereport(ERROR,

                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),

                         errmsg("unrecognized value for CREATE_REPLICATION_SLOT option \"%s\": \"%s\"",

                                defel->defname, action)));

        }

        else if (strcmp(defel->defname, "reserve_wal") == 0)

        {

            if (reserve_wal_given || cmd->kind != REPLICATION_KIND_PHYSICAL)

                ereport(ERROR,

                        (errcode(ERRCODE_SYNTAX_ERROR),

                         errmsg("conflicting or redundant options")));


            reserve_wal_given = true;

            *reserve_wal = defGetBoolean(defel);

        }

        else if (strcmp(defel->defname, "two_phase") == 0)

        {

            if (two_phase_given || cmd->kind != REPLICATION_KIND_LOGICAL)

                ereport(ERROR,

                        (errcode(ERRCODE_SYNTAX_ERROR),

                         errmsg("conflicting or redundant options")));

            two_phase_given = true;

            *two_phase = defGetBoolean(defel);

        }

        else if (strcmp(defel->defname, "failover") == 0)

        {

            if (failover_given || cmd->kind != REPLICATION_KIND_LOGICAL)

                ereport(ERROR,

                        (errcode(ERRCODE_SYNTAX_ERROR),

                         errmsg("conflicting or redundant options")));

            failover_given = true;

            *failover = defGetBoolean(defel);

        }

        else

            elog(ERROR, "unrecognized option: %s", defel->defname);

    }

}


/*

 * Create a new replication slot.

 */

static void

CreateReplicationSlot(CreateReplicationSlotCmd *cmd)

{

    const char *snapshot_name = NULL;

    char        xloc[MAXFNAMELEN];

    char       *slot_name;

    bool        reserve_wal = false;

    bool        two_phase = false;

    bool        failover = false;

    CRSSnapshotAction snapshot_action = CRS_EXPORT_SNAPSHOT;

    DestReceiver *dest;

    TupOutputState *tstate;

    TupleDesc   tupdesc;

    Datum       values[4];

    bool        nulls[4] = {0};


    Assert(!MyReplicationSlot);


    parseCreateReplSlotOptions(cmd, &reserve_wal, &snapshot_action, &two_phase,

                               &failover);


    if (cmd->kind == REPLICATION_KIND_PHYSICAL)

    {

        ReplicationSlotCreate(cmd->slotname, false,

                              cmd->temporary ? RS_TEMPORARY : RS_PERSISTENT,

                              false, false, false);


        if (reserve_wal)

        {

            ReplicationSlotReserveWal();


            ReplicationSlotMarkDirty();


            /* Write this slot to disk if it's a permanent one. */

            if (!cmd->temporary)

                ReplicationSlotSave();

        }

    }

    else

    {

        LogicalDecodingContext *ctx;

        bool        need_full_snapshot = false;


        Assert(cmd->kind == REPLICATION_KIND_LOGICAL);


        CheckLogicalDecodingRequirements();


        /*

         * Initially create persistent slot as ephemeral - that allows us to

         * nicely handle errors during initialization because it'll get

         * dropped if this transaction fails. We'll make it persistent at the

         * end. Temporary slots can be created as temporary from beginning as

         * they get dropped on error as well.

         */

        ReplicationSlotCreate(cmd->slotname, true,

                              cmd->temporary ? RS_TEMPORARY : RS_EPHEMERAL,

                              two_phase, failover, false);


        /*

         * Do options check early so that we can bail before calling the

         * DecodingContextFindStartpoint which can take long time.

         */

        if (snapshot_action == CRS_EXPORT_SNAPSHOT)

        {

            if (IsTransactionBlock())

                ereport(ERROR,

                /*- translator: %s is a CREATE_REPLICATION_SLOT statement */

                        (errmsg("%s must not be called inside a transaction",

                                "CREATE_REPLICATION_SLOT ... (SNAPSHOT 'export')")));


            need_full_snapshot = true;

        }

        else if (snapshot_action == CRS_USE_SNAPSHOT)

        {

            if (!IsTransactionBlock())

                ereport(ERROR,

                /*- translator: %s is a CREATE_REPLICATION_SLOT statement */

                        (errmsg("%s must be called inside a transaction",

                                "CREATE_REPLICATION_SLOT ... (SNAPSHOT 'use')")));


            if (XactIsoLevel != XACT_REPEATABLE_READ)

                ereport(ERROR,

                /*- translator: %s is a CREATE_REPLICATION_SLOT statement */

                        (errmsg("%s must be called in REPEATABLE READ isolation mode transaction",

                                "CREATE_REPLICATION_SLOT ... (SNAPSHOT 'use')")));

            if (!XactReadOnly)

                ereport(ERROR,

                /*- translator: %s is a CREATE_REPLICATION_SLOT statement */

                        (errmsg("%s must be called in a read-only transaction",

                                "CREATE_REPLICATION_SLOT ... (SNAPSHOT 'use')")));


            if (FirstSnapshotSet)

                ereport(ERROR,

                /*- translator: %s is a CREATE_REPLICATION_SLOT statement */

                        (errmsg("%s must be called before any query",

                                "CREATE_REPLICATION_SLOT ... (SNAPSHOT 'use')")));


            if (IsSubTransaction())

                ereport(ERROR,

                /*- translator: %s is a CREATE_REPLICATION_SLOT statement */

                        (errmsg("%s must not be called in a subtransaction",

                                "CREATE_REPLICATION_SLOT ... (SNAPSHOT 'use')")));


            need_full_snapshot = true;

        }


        ctx = CreateInitDecodingContext(cmd->plugin, NIL, need_full_snapshot,

                                        InvalidXLogRecPtr,

                                        XL_ROUTINE(.page_read = logical_read_xlog_page,

                                                   .segment_open = WalSndSegmentOpen,

                                                   .segment_close = wal_segment_close),

                                        WalSndPrepareWrite, WalSndWriteData,

                                        WalSndUpdateProgress);


        /*

         * Signal that we don't need the timeout mechanism. We're just

         * creating the replication slot and don't yet accept feedback

         * messages or send keepalives. As we possibly need to wait for

         * further WAL the walsender would otherwise possibly be killed too

         * soon.

         */

        last_reply_timestamp = 0;


        /* build initial snapshot, might take a while */

        DecodingContextFindStartpoint(ctx);


        /*

         * Export or use the snapshot if we've been asked to do so.

         *

         * NB. We will convert the snapbuild.c kind of snapshot to normal

         * snapshot when doing this.

         */

        if (snapshot_action == CRS_EXPORT_SNAPSHOT)

        {

            snapshot_name = SnapBuildExportSnapshot(ctx->snapshot_builder);

        }

        else if (snapshot_action == CRS_USE_SNAPSHOT)

        {

            Snapshot    snap;


            snap = SnapBuildInitialSnapshot(ctx->snapshot_builder);

            RestoreTransactionSnapshot(snap, MyProc);

        }


        /* don't need the decoding context anymore */

        FreeDecodingContext(ctx);


        if (!cmd->temporary)

            ReplicationSlotPersist();

    }


    snprintf(xloc, sizeof(xloc), "%X/%X",

             LSN_FORMAT_ARGS(MyReplicationSlot->data.confirmed_flush));


    dest = CreateDestReceiver(DestRemoteSimple);


    /*----------

     * Need a tuple descriptor representing four columns:

     * - first field: the slot name

     * - second field: LSN at which we became consistent

     * - third field: exported snapshot's name

     * - fourth field: output plugin

     */

    tupdesc = CreateTemplateTupleDesc(4);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "slot_name",

                              TEXTOID, -1, 0);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "consistent_point",

                              TEXTOID, -1, 0);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "snapshot_name",

                              TEXTOID, -1, 0);

    TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 4, "output_plugin",

                              TEXTOID, -1, 0);


    /* prepare for projection of tuples */

    tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);


    /* slot_name */

    slot_name = NameStr(MyReplicationSlot->data.name);

    values[0] = CStringGetTextDatum(slot_name);


    /* consistent wal location */

    values[1] = CStringGetTextDatum(xloc);


    /* snapshot name, or NULL if none */

    if (snapshot_name != NULL)

        values[2] = CStringGetTextDatum(snapshot_name);

    else

        nulls[2] = true;


    /* plugin, or NULL if none */

    if (cmd->plugin != NULL)

        values[3] = CStringGetTextDatum(cmd->plugin);

    else

        nulls[3] = true;


    /* send it to dest */

    do_tup_output(tstate, values, nulls);

    end_tup_output(tstate);


    ReplicationSlotRelease();

}


/*

 * Get rid of a replication slot that is no longer wanted.

 */

static void

DropReplicationSlot(DropReplicationSlotCmd *cmd)

{

    ReplicationSlotDrop(cmd->slotname, !cmd->wait);

}


/*

 * Change the definition of a replication slot.

 */

static void

AlterReplicationSlot(AlterReplicationSlotCmd *cmd)

{

    bool        failover_given = false;

    bool        two_phase_given = false;

    bool        failover;

    bool        two_phase;


    /* Parse options */

    foreach_ptr(DefElem, defel, cmd->options)

    {

        if (strcmp(defel->defname, "failover") == 0)

        {

            if (failover_given)

                ereport(ERROR,

                        (errcode(ERRCODE_SYNTAX_ERROR),

                         errmsg("conflicting or redundant options")));

            failover_given = true;

            failover = defGetBoolean(defel);

        }

        else if (strcmp(defel->defname, "two_phase") == 0)

        {

            if (two_phase_given)

                ereport(ERROR,

                        (errcode(ERRCODE_SYNTAX_ERROR),

                         errmsg("conflicting or redundant options")));

            two_phase_given = true;

            two_phase = defGetBoolean(defel);

        }

        else

            elog(ERROR, "unrecognized option: %s", defel->defname);

    }


    ReplicationSlotAlter(cmd->slotname,

                         failover_given ? &failover : NULL,

                         two_phase_given ? &two_phase : NULL);

}


/*

 * Load previously initiated logical slot and prepare for sending data (via

 * WalSndLoop).

 */

static void

StartLogicalReplication(StartReplicationCmd *cmd)

{

    StringInfoData buf;

    QueryCompletion qc;


    /* make sure that our requirements are still fulfilled */

    CheckLogicalDecodingRequirements();


    Assert(!MyReplicationSlot);


    ReplicationSlotAcquire(cmd->slotname, true, true);


    /*

     * Force a disconnect, so that the decoding code doesn't need to care

     * about an eventual switch from running in recovery, to running in a

     * normal environment. Client code is expected to handle reconnects.

     */

    if (am_cascading_walsender && !RecoveryInProgress())

    {

        ereport(LOG,

                (errmsg("terminating walsender process after promotion")));

        got_STOPPING = true;

    }


    /*

     * Create our decoding context, making it start at the previously ack'ed

     * position.

     *

     * Do this before sending a CopyBothResponse message, so that any errors

     * are reported early.

     */

    logical_decoding_ctx =

        CreateDecodingContext(cmd->startpoint, cmd->options, false,

                              XL_ROUTINE(.page_read = logical_read_xlog_page,

                                         .segment_open = WalSndSegmentOpen,

                                         .segment_close = wal_segment_close),

                              WalSndPrepareWrite, WalSndWriteData,

                              WalSndUpdateProgress);

    xlogreader = logical_decoding_ctx->reader;


    WalSndSetState(WALSNDSTATE_CATCHUP);


    /* Send a CopyBothResponse message, and start streaming */

    pq_beginmessage(&buf, PqMsg_CopyBothResponse);

    pq_sendbyte(&buf, 0);

    pq_sendint16(&buf, 0);

    pq_endmessage(&buf);

    pq_flush();


    /* Start reading WAL from the oldest required WAL. */

    XLogBeginRead(logical_decoding_ctx->reader,

                  MyReplicationSlot->data.restart_lsn);


    /*

     * Report the location after which we'll send out further commits as the

     * current sentPtr.

     */

    sentPtr = MyReplicationSlot->data.confirmed_flush;


    /* Also update the sent position status in shared memory */

    SpinLockAcquire(&MyWalSnd->mutex);

    MyWalSnd->sentPtr = MyReplicationSlot->data.restart_lsn;

    SpinLockRelease(&MyWalSnd->mutex);


    replication_active = true;


    SyncRepInitConfig();


    /* Main loop of walsender */

    WalSndLoop(XLogSendLogical);


    FreeDecodingContext(logical_decoding_ctx);

    ReplicationSlotRelease();


    replication_active = false;

    if (got_STOPPING)

        proc_exit(0);

    WalSndSetState(WALSNDSTATE_STARTUP);


    /* Get out of COPY mode (CommandComplete). */

    SetQueryCompletion(&qc, CMDTAG_COPY, 0);

    EndCommand(&qc, DestRemote, false);

}


/*

 * LogicalDecodingContext 'prepare_write' callback.

 *

 * Prepare a write into a StringInfo.

 *

 * Don't do anything lasting in here, it's quite possible that nothing will be done

 * with the data.

 */

static void

WalSndPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, bool last_write)

{

    /* can't have sync rep confused by sending the same LSN several times */

    if (!last_write)

        lsn = InvalidXLogRecPtr;


    resetStringInfo(ctx->out);


    pq_sendbyte(ctx->out, 'w');

    pq_sendint64(ctx->out, lsn);    /* dataStart */

    pq_sendint64(ctx->out, lsn);    /* walEnd */


    /*

     * Fill out the sendtime later, just as it's done in XLogSendPhysical, but

     * reserve space here.

     */

    pq_sendint64(ctx->out, 0);  /* sendtime */

}


/*

 * LogicalDecodingContext 'write' callback.

 *

 * Actually write out data previously prepared by WalSndPrepareWrite out to

 * the network. Take as long as needed, but process replies from the other

 * side and check timeouts during that.

 */

static void

WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,

                bool last_write)

{

    TimestampTz now;


    /*

     * Fill the send timestamp last, so that it is taken as late as possible.

     * This is somewhat ugly, but the protocol is set as it's already used for

     * several releases by streaming physical replication.

     */

    resetStringInfo(&tmpbuf);

    now = GetCurrentTimestamp();

    pq_sendint64(&tmpbuf, now);

    memcpy(&ctx->out->data[1 + sizeof(int64) + sizeof(int64)],

           tmpbuf.data, sizeof(int64));


    /* output previously gathered data in a CopyData packet */

    pq_putmessage_noblock('d', ctx->out->data, ctx->out->len);


    CHECK_FOR_INTERRUPTS();


    /* Try to flush pending output to the client */

    if (pq_flush_if_writable() != 0)

        WalSndShutdown();


    /* Try taking fast path unless we get too close to walsender timeout. */

    if (now < TimestampTzPlusMilliseconds(last_reply_timestamp,

                                          wal_sender_timeout / 2) &&

        !pq_is_send_pending())

    {

        return;

    }


    /* If we have pending write here, go to slow path */

    ProcessPendingWrites();

}


/*

 * Wait until there is no pending write. Also process replies from the other

 * side and check timeouts during that.

 */

static void

ProcessPendingWrites(void)

{

    for (;;)

    {

        long        sleeptime;


        /* Check for input from the client */

        ProcessRepliesIfAny();


        /* die if timeout was reached */

        WalSndCheckTimeOut();


        /* Send keepalive if the time has come */

        WalSndKeepaliveIfNecessary();


        if (!pq_is_send_pending())

            break;


        sleeptime = WalSndComputeSleeptime(GetCurrentTimestamp());


        /* Sleep until something happens or we time out */

        WalSndWait(WL_SOCKET_WRITEABLE | WL_SOCKET_READABLE, sleeptime,

                   WAIT_EVENT_WAL_SENDER_WRITE_DATA);


        /* Clear any already-pending wakeups */

        ResetLatch(MyLatch);


        CHECK_FOR_INTERRUPTS();


        /* Process any requests or signals received recently */

        if (ConfigReloadPending)

        {

            ConfigReloadPending = false;

            ProcessConfigFile(PGC_SIGHUP);

            SyncRepInitConfig();

        }


        /* Try to flush pending output to the client */

        if (pq_flush_if_writable() != 0)

            WalSndShutdown();

    }


    /* reactivate latch so WalSndLoop knows to continue */

    SetLatch(MyLatch);

}


/*

 * LogicalDecodingContext 'update_progress' callback.

 *

 * Write the current position to the lag tracker (see XLogSendPhysical).

 *

 * When skipping empty transactions, send a keepalive message if necessary.

 */

static void

WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,

                     bool skipped_xact)

{

    static TimestampTz sendTime = 0;

    TimestampTz now = GetCurrentTimestamp();

    bool        pending_writes = false;

    bool        end_xact = ctx->end_xact;


    /*

     * Track lag no more than once per WALSND_LOGICAL_LAG_TRACK_INTERVAL_MS to

     * avoid flooding the lag tracker when we commit frequently.

     *

     * We don't have a mechanism to get the ack for any LSN other than end

     * xact LSN from the downstream. So, we track lag only for end of

     * transaction LSN.

     */

#define WALSND_LOGICAL_LAG_TRACK_INTERVAL_MS    1000

    if (end_xact && TimestampDifferenceExceeds(sendTime, now,

                                               WALSND_LOGICAL_LAG_TRACK_INTERVAL_MS))

    {

        LagTrackerWrite(lsn, now);

        sendTime = now;

    }


    /*

     * When skipping empty transactions in synchronous replication, we send a

     * keepalive message to avoid delaying such transactions.

     *

     * It is okay to check sync_standbys_status without lock here as in the

     * worst case we will just send an extra keepalive message when it is

     * really not required.

     */

    if (skipped_xact &&

        SyncRepRequested() &&

        (((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_status & SYNC_STANDBY_DEFINED))

    {

        WalSndKeepalive(false, lsn);


        /* Try to flush pending output to the client */

        if (pq_flush_if_writable() != 0)

            WalSndShutdown();


        /* If we have pending write here, make sure it's actually flushed */

        if (pq_is_send_pending())

            pending_writes = true;

    }


    /*

     * Process pending writes if any or try to send a keepalive if required.

     * We don't need to try sending keep alive messages at the transaction end

     * as that will be done at a later point in time. This is required only

     * for large transactions where we don't send any changes to the

     * downstream and the receiver can timeout due to that.

     */

    if (pending_writes || (!end_xact &&

                           now >= TimestampTzPlusMilliseconds(last_reply_timestamp,

                                                              wal_sender_timeout / 2)))

        ProcessPendingWrites();

}


/*

 * Wake up the logical walsender processes with logical failover slots if the

 * currently acquired physical slot is specified in synchronized_standby_slots GUC.

 */

void

PhysicalWakeupLogicalWalSnd(void)

{

    Assert(MyReplicationSlot && SlotIsPhysical(MyReplicationSlot));


    /*

     * If we are running in a standby, there is no need to wake up walsenders.

     * This is because we do not support syncing slots to cascading standbys,

     * so, there are no walsenders waiting for standbys to catch up.

     */

    if (RecoveryInProgress())

        return;


    if (SlotExistsInSyncStandbySlots(NameStr(MyReplicationSlot->data.name)))

        ConditionVariableBroadcast(&WalSndCtl->wal_confirm_rcv_cv);

}


/*

 * Returns true if not all standbys have caught up to the flushed position

 * (flushed_lsn) when the current acquired slot is a logical failover

 * slot and we are streaming; otherwise, returns false.

 *

 * If returning true, the function sets the appropriate wait event in

 * wait_event; otherwise, wait_event is set to 0.

 */

static bool

NeedToWaitForStandbys(XLogRecPtr flushed_lsn, uint32 *wait_event)

{

    int         elevel = got_STOPPING ? ERROR : WARNING;

    bool        failover_slot;


    failover_slot = (replication_active && MyReplicationSlot->data.failover);


    /*

     * Note that after receiving the shutdown signal, an ERROR is reported if

     * any slots are dropped, invalidated, or inactive. This measure is taken

     * to prevent the walsender from waiting indefinitely.

     */

    if (failover_slot && !StandbySlotsHaveCaughtup(flushed_lsn, elevel))

    {

        *wait_event = WAIT_EVENT_WAIT_FOR_STANDBY_CONFIRMATION;

        return true;

    }


    *wait_event = 0;

    return false;

}


/*

 * Returns true if we need to wait for WALs to be flushed to disk, or if not

 * all standbys have caught up to the flushed position (flushed_lsn) when the

 * current acquired slot is a logical failover slot and we are

 * streaming; otherwise, returns false.

 *

 * If returning true, the function sets the appropriate wait event in

 * wait_event; otherwise, wait_event is set to 0.

 */

static bool

NeedToWaitForWal(XLogRecPtr target_lsn, XLogRecPtr flushed_lsn,

                 uint32 *wait_event)

{

    /* Check if we need to wait for WALs to be flushed to disk */

    if (target_lsn > flushed_lsn)

    {

        *wait_event = WAIT_EVENT_WAL_SENDER_WAIT_FOR_WAL;

        return true;

    }


    /* Check if the standby slots have caught up to the flushed position */

    return NeedToWaitForStandbys(flushed_lsn, wait_event);

}


/*

 * Wait till WAL < loc is flushed to disk so it can be safely sent to client.

 *

 * If the walsender holds a logical failover slot, we also wait for all the

 * specified streaming replication standby servers to confirm receipt of WAL

 * up to RecentFlushPtr. It is beneficial to wait here for the confirmation

 * up to RecentFlushPtr rather than waiting before transmitting each change

 * to logical subscribers, which is already covered by RecentFlushPtr.

 *

 * Returns end LSN of flushed WAL.  Normally this will be >= loc, but if we

 * detect a shutdown request (either from postmaster or client) we will return

 * early, so caller must always check.

 */

static XLogRecPtr

WalSndWaitForWal(XLogRecPtr loc)

{

    int         wakeEvents;

    uint32      wait_event = 0;

    static XLogRecPtr RecentFlushPtr = InvalidXLogRecPtr;

    TimestampTz last_flush = 0;


    /*

     * Fast path to avoid acquiring the spinlock in case we already know we

     * have enough WAL available and all the standby servers have confirmed

     * receipt of WAL up to RecentFlushPtr. This is particularly interesting

     * if we're far behind.

     */

    if (!XLogRecPtrIsInvalid(RecentFlushPtr) &&

        !NeedToWaitForWal(loc, RecentFlushPtr, &wait_event))

        return RecentFlushPtr;


    /*

     * Within the loop, we wait for the necessary WALs to be flushed to disk

     * first, followed by waiting for standbys to catch up if there are enough

     * WALs (see NeedToWaitForWal()) or upon receiving the shutdown signal.

     */

    for (;;)

    {

        bool        wait_for_standby_at_stop = false;

        long        sleeptime;

        TimestampTz now;


        /* Clear any already-pending wakeups */

        ResetLatch(MyLatch);


        CHECK_FOR_INTERRUPTS();


        /* Process any requests or signals received recently */

        if (ConfigReloadPending)

        {

            ConfigReloadPending = false;

            ProcessConfigFile(PGC_SIGHUP);

            SyncRepInitConfig();

        }


        /* Check for input from the client */

        ProcessRepliesIfAny();


        /*

         * If we're shutting down, trigger pending WAL to be written out,

         * otherwise we'd possibly end up waiting for WAL that never gets

         * written, because walwriter has shut down already.

         */

        if (got_STOPPING)

            XLogBackgroundFlush();


        /*

         * To avoid the scenario where standbys need to catch up to a newer

         * WAL location in each iteration, we update our idea of the currently

         * flushed position only if we are not waiting for standbys to catch

         * up.

         */

        if (wait_event != WAIT_EVENT_WAIT_FOR_STANDBY_CONFIRMATION)

        {

            if (!RecoveryInProgress())

                RecentFlushPtr = GetFlushRecPtr(NULL);

            else

                RecentFlushPtr = GetXLogReplayRecPtr(NULL);

        }


        /*

         * If postmaster asked us to stop and the standby slots have caught up

         * to the flushed position, don't wait anymore.

         *

         * It's important to do this check after the recomputation of

         * RecentFlushPtr, so we can send all remaining data before shutting

         * down.

         */

        if (got_STOPPING)

        {

            if (NeedToWaitForStandbys(RecentFlushPtr, &wait_event))

                wait_for_standby_at_stop = true;

            else

                break;

        }


        /*

         * We only send regular messages to the client for full decoded

         * transactions, but a synchronous replication and walsender shutdown

         * possibly are waiting for a later location. So, before sleeping, we

         * send a ping containing the flush location. If the receiver is

         * otherwise idle, this keepalive will trigger a reply. Processing the

         * reply will update these MyWalSnd locations.

         */

        if (MyWalSnd->flush < sentPtr &&

            MyWalSnd->write < sentPtr &&

            !waiting_for_ping_response)

            WalSndKeepalive(false, InvalidXLogRecPtr);


        /*

         * Exit the loop if already caught up and doesn't need to wait for

         * standby slots.

         */

        if (!wait_for_standby_at_stop &&

            !NeedToWaitForWal(loc, RecentFlushPtr, &wait_event))

            break;


        /*

         * Waiting for new WAL or waiting for standbys to catch up. Since we

         * need to wait, we're now caught up.

         */

        WalSndCaughtUp = true;


        /*

         * Try to flush any pending output to the client.

         */

        if (pq_flush_if_writable() != 0)

            WalSndShutdown();


        /*

         * If we have received CopyDone from the client, sent CopyDone

         * ourselves, and the output buffer is empty, it's time to exit

         * streaming, so fail the current WAL fetch request.

         */

        if (streamingDoneReceiving && streamingDoneSending &&

            !pq_is_send_pending())

            break;


        /* die if timeout was reached */

        WalSndCheckTimeOut();


        /* Send keepalive if the time has come */

        WalSndKeepaliveIfNecessary();


        /*

         * Sleep until something happens or we time out.  Also wait for the

         * socket becoming writable, if there's still pending output.

         * Otherwise we might sit on sendable output data while waiting for

         * new WAL to be generated.  (But if we have nothing to send, we don't

         * want to wake on socket-writable.)

         */

        now = GetCurrentTimestamp();

        sleeptime = WalSndComputeSleeptime(now);


        wakeEvents = WL_SOCKET_READABLE;


        if (pq_is_send_pending())

            wakeEvents |= WL_SOCKET_WRITEABLE;


        Assert(wait_event != 0);


        /* Report IO statistics, if needed */

        if (TimestampDifferenceExceeds(last_flush, now,

                                       WALSENDER_STATS_FLUSH_INTERVAL))

        {

            pgstat_flush_io(false);

            (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO);

            last_flush = now;

        }


        WalSndWait(wakeEvents, sleeptime, wait_event);

    }


    /* reactivate latch so WalSndLoop knows to continue */

    SetLatch(MyLatch);

    return RecentFlushPtr;

}


/*

 * Execute an incoming replication command.

 *

 * Returns true if the cmd_string was recognized as WalSender command, false

 * if not.

 */

bool

exec_replication_command(const char *cmd_string)

{

    yyscan_t    scanner;

    int         parse_rc;

    Node       *cmd_node;

    const char *cmdtag;

    MemoryContext old_context = CurrentMemoryContext;


    /* We save and re-use the cmd_context across calls */

    static MemoryContext cmd_context = NULL;


    /*

     * If WAL sender has been told that shutdown is getting close, switch its

     * status accordingly to handle the next replication commands correctly.

     */

    if (got_STOPPING)

        WalSndSetState(WALSNDSTATE_STOPPING);


    /*

     * Throw error if in stopping mode.  We need prevent commands that could

     * generate WAL while the shutdown checkpoint is being written.  To be

     * safe, we just prohibit all new commands.

     */

    if (MyWalSnd->state == WALSNDSTATE_STOPPING)

        ereport(ERROR,

                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),

                 errmsg("cannot execute new commands while WAL sender is in stopping mode")));


    /*

     * CREATE_REPLICATION_SLOT ... LOGICAL exports a snapshot until the next

     * command arrives. Clean up the old stuff if there's anything.

     */

    SnapBuildClearExportedSnapshot();


    CHECK_FOR_INTERRUPTS();


    /*

     * Prepare to parse and execute the command.

     *

     * Because replication command execution can involve beginning or ending

     * transactions, we need a working context that will survive that, so we

     * make it a child of TopMemoryContext.  That in turn creates a hazard of

     * long-lived memory leaks if we lose track of the working context.  We

     * deal with that by creating it only once per walsender, and resetting it

     * for each new command.  (Normally this reset is a no-op, but if the

     * prior exec_replication_command call failed with an error, it won't be.)

     *

     * This is subtler than it looks.  The transactions we manage can extend

     * across replication commands, indeed SnapBuildClearExportedSnapshot

     * might have just ended one.  Because transaction exit will revert to the

     * memory context that was current at transaction start, we need to be

     * sure that that context is still valid.  That motivates re-using the

     * same cmd_context rather than making a new one each time.

     */

    if (cmd_context == NULL)

        cmd_context = AllocSetContextCreate(TopMemoryContext,

                                            "Replication command context",

                                            ALLOCSET_DEFAULT_SIZES);

    else

        MemoryContextReset(cmd_context);


    MemoryContextSwitchTo(cmd_context);


    replication_scanner_init(cmd_string, &scanner);


    /*

     * Is it a WalSender command?

     */

    if (!replication_scanner_is_replication_command(scanner))

    {

        /* Nope; clean up and get out. */

        replication_scanner_finish(scanner);


        MemoryContextSwitchTo(old_context);

        MemoryContextReset(cmd_context);


        /* XXX this is a pretty random place to make this check */

        if (MyDatabaseId == InvalidOid)

            ereport(ERROR,

                    (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

                     errmsg("cannot execute SQL commands in WAL sender for physical replication")));


        /* Tell the caller that this wasn't a WalSender command. */

        return false;

    }


    /*

     * Looks like a WalSender command, so parse it.

     */

    parse_rc = replication_yyparse(&cmd_node, scanner);

    if (parse_rc != 0)

        ereport(ERROR,

                (errcode(ERRCODE_SYNTAX_ERROR),

                 errmsg_internal("replication command parser returned %d",

                                 parse_rc)));

    replication_scanner_finish(scanner);


    /*

     * Report query to various monitoring facilities.  For this purpose, we

     * report replication commands just like SQL commands.

     */

    debug_query_string = cmd_string;


    pgstat_report_activity(STATE_RUNNING, cmd_string);


    /*

     * Log replication command if log_replication_commands is enabled. Even

     * when it's disabled, log the command with DEBUG1 level for backward

     * compatibility.

     */

    ereport(log_replication_commands ? LOG : DEBUG1,

            (errmsg("received replication command: %s", cmd_string)));


    /*

     * Disallow replication commands in aborted transaction blocks.

     */

    if (IsAbortedTransactionBlockState())

        ereport(ERROR,

                (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),

                 errmsg("current transaction is aborted, "

                        "commands ignored until end of transaction block")));


    CHECK_FOR_INTERRUPTS();


    /*

     * Allocate buffers that will be used for each outgoing and incoming

     * message.  We do this just once per command to reduce palloc overhead.

     */

    initStringInfo(&output_message);

    initStringInfo(&reply_message);

    initStringInfo(&tmpbuf);


    switch (cmd_node->type)

    {

        case T_IdentifySystemCmd:

            cmdtag = "IDENTIFY_SYSTEM";

            set_ps_display(cmdtag);

            IdentifySystem();

            EndReplicationCommand(cmdtag);

            break;


        case T_ReadReplicationSlotCmd:

            cmdtag = "READ_REPLICATION_SLOT";

            set_ps_display(cmdtag);

            ReadReplicationSlot((ReadReplicationSlotCmd *) cmd_node);

            EndReplicationCommand(cmdtag);

            break;


        case T_BaseBackupCmd:

            cmdtag = "BASE_BACKUP";

            set_ps_display(cmdtag);

            PreventInTransactionBlock(true, cmdtag);

            SendBaseBackup((BaseBackupCmd *) cmd_node, uploaded_manifest);

            EndReplicationCommand(cmdtag);

            break;


        case T_CreateReplicationSlotCmd:

            cmdtag = "CREATE_REPLICATION_SLOT";

            set_ps_display(cmdtag);

            CreateReplicationSlot((CreateReplicationSlotCmd *) cmd_node);

            EndReplicationCommand(cmdtag);

            break;


        case T_DropReplicationSlotCmd:

            cmdtag = "DROP_REPLICATION_SLOT";

            set_ps_display(cmdtag);

            DropReplicationSlot((DropReplicationSlotCmd *) cmd_node);

            EndReplicationCommand(cmdtag);

            break;


        case T_AlterReplicationSlotCmd:

            cmdtag = "ALTER_REPLICATION_SLOT";

            set_ps_display(cmdtag);

            AlterReplicationSlot((AlterReplicationSlotCmd *) cmd_node);

            EndReplicationCommand(cmdtag);

            break;


        case T_StartReplicationCmd:

            {

                StartReplicationCmd *cmd = (StartReplicationCmd *) cmd_node;


                cmdtag = "START_REPLICATION";

                set_ps_display(cmdtag);

                PreventInTransactionBlock(true, cmdtag);


                if (cmd->kind == REPLICATION_KIND_PHYSICAL)

                    StartReplication(cmd);

                else

                    StartLogicalReplication(cmd);


                /* dupe, but necessary per libpqrcv_endstreaming */

                EndReplicationCommand(cmdtag);


                Assert(xlogreader != NULL);

                break;

            }


        case T_TimeLineHistoryCmd:

            cmdtag = "TIMELINE_HISTORY";

            set_ps_display(cmdtag);

            PreventInTransactionBlock(true, cmdtag);

            SendTimeLineHistory((TimeLineHistoryCmd *) cmd_node);

            EndReplicationCommand(cmdtag);

            break;


        case T_VariableShowStmt:

            {

                DestReceiver *dest = CreateDestReceiver(DestRemoteSimple);

                VariableShowStmt *n = (VariableShowStmt *) cmd_node;


                cmdtag = "SHOW";

                set_ps_display(cmdtag);


                /* syscache access needs a transaction environment */

                StartTransactionCommand();

                GetPGVariable(n->name, dest);

                CommitTransactionCommand();

                EndReplicationCommand(cmdtag);

            }

            break;


        case T_UploadManifestCmd:

            cmdtag = "UPLOAD_MANIFEST";

            set_ps_display(cmdtag);

            PreventInTransactionBlock(true, cmdtag);

            UploadManifest();

            EndReplicationCommand(cmdtag);

            break;


        default:

            elog(ERROR, "unrecognized replication command node tag: %u",

                 cmd_node->type);

    }


    /*

     * Done.  Revert to caller's memory context, and clean out the cmd_context

     * to recover memory right away.

     */

    MemoryContextSwitchTo(old_context);

    MemoryContextReset(cmd_context);


    /*

     * We need not update ps display or pg_stat_activity, because PostgresMain

     * will reset those to "idle".  But we must reset debug_query_string to

     * ensure it doesn't become a dangling pointer.

     */

    debug_query_string = NULL;


    return true;

}


/*

 * Process any incoming messages while streaming. Also checks if the remote

 * end has closed the connection.

 */

static void

ProcessRepliesIfAny(void)

{

    unsigned char firstchar;

    int         maxmsglen;

    int         r;

    bool        received = false;


    last_processing = GetCurrentTimestamp();


    /*

     * If we already received a CopyDone from the frontend, any subsequent

     * message is the beginning of a new command, and should be processed in

     * the main processing loop.

     */

    while (!streamingDoneReceiving)

    {

        pq_startmsgread();

        r = pq_getbyte_if_available(&firstchar);

        if (r < 0)

        {

            /* unexpected error or EOF */

            ereport(COMMERROR,

                    (errcode(ERRCODE_PROTOCOL_VIOLATION),

                     errmsg("unexpected EOF on standby connection")));

            proc_exit(0);

        }

        if (r == 0)

        {

            /* no data available without blocking */

            pq_endmsgread();

            break;

        }


        /* Validate message type and set packet size limit */

        switch (firstchar)

        {

            case PqMsg_CopyData:

                maxmsglen = PQ_LARGE_MESSAGE_LIMIT;

                break;

            case PqMsg_CopyDone:

            case PqMsg_Terminate:

                maxmsglen = PQ_SMALL_MESSAGE_LIMIT;

                break;

            default:

                ereport(FATAL,

                        (errcode(ERRCODE_PROTOCOL_VIOLATION),

                         errmsg("invalid standby message type \"%c\"",

                                firstchar)));

                maxmsglen = 0;  /* keep compiler quiet */

                break;

        }


        /* Read the message contents */

        resetStringInfo(&reply_message);

        if (pq_getmessage(&reply_message, maxmsglen))

        {

            ereport(COMMERROR,

                    (errcode(ERRCODE_PROTOCOL_VIOLATION),

                     errmsg("unexpected EOF on standby connection")));

            proc_exit(0);

        }


        /* ... and process it */

        switch (firstchar)

        {

                /*

                 * 'd' means a standby reply wrapped in a CopyData packet.

                 */

            case PqMsg_CopyData:

                ProcessStandbyMessage();

                received = true;

                break;


                /*

                 * CopyDone means the standby requested to finish streaming.

                 * Reply with CopyDone, if we had not sent that already.

                 */

            case PqMsg_CopyDone:

                if (!streamingDoneSending)

                {

                    pq_putmessage_noblock('c', NULL, 0);

                    streamingDoneSending = true;

                }


                streamingDoneReceiving = true;

                received = true;

                break;


                /*

                 * 'X' means that the standby is closing down the socket.

                 */

            case PqMsg_Terminate:

                proc_exit(0);


            default:

                Assert(false);  /* NOT REACHED */

        }

    }


    /*

     * Save the last reply timestamp if we've received at least one reply.

     */

    if (received)

    {

        last_reply_timestamp = last_processing;

        waiting_for_ping_response = false;

    }

}


/*

 * Process a status update message received from standby.

 */

static void

ProcessStandbyMessage(void)

{

    char        msgtype;


    /*

     * Check message type from the first byte.

     */

    msgtype = pq_getmsgbyte(&reply_message);


    switch (msgtype)

    {

        case 'r':

            ProcessStandbyReplyMessage();

            break;


        case 'h':

            ProcessStandbyHSFeedbackMessage();

            break;


        default:

            ereport(COMMERROR,

                    (errcode(ERRCODE_PROTOCOL_VIOLATION),

                     errmsg("unexpected message type \"%c\"", msgtype)));

            proc_exit(0);

    }

}


/*

 * Remember that a walreceiver just confirmed receipt of lsn `lsn`.

 */

static void

PhysicalConfirmReceivedLocation(XLogRecPtr lsn)

{

    bool        changed = false;

    ReplicationSlot *slot = MyReplicationSlot;


    Assert(lsn != InvalidXLogRecPtr);

    SpinLockAcquire(&slot->mutex);

    if (slot->data.restart_lsn != lsn)

    {

        changed = true;

        slot->data.restart_lsn = lsn;

    }

    SpinLockRelease(&slot->mutex);


    if (changed)

    {

        ReplicationSlotMarkDirty();

        ReplicationSlotsComputeRequiredLSN();

        PhysicalWakeupLogicalWalSnd();

    }


    /*

     * One could argue that the slot should be saved to disk now, but that'd

     * be energy wasted - the worst thing lost information could cause here is

     * to give wrong information in a statistics view - we'll just potentially

     * be more conservative in removing files.

     */

}


/*

 * Regular reply from standby advising of WAL locations on standby server.

 */

static void

ProcessStandbyReplyMessage(void)

{

    XLogRecPtr  writePtr,

                flushPtr,

                applyPtr;

    bool        replyRequested;

    TimeOffset  writeLag,

                flushLag,

                applyLag;

    bool        clearLagTimes;

    TimestampTz now;

    TimestampTz replyTime;


    static bool fullyAppliedLastTime = false;


    /* the caller already consumed the msgtype byte */

    writePtr = pq_getmsgint64(&reply_message);

    flushPtr = pq_getmsgint64(&reply_message);

    applyPtr = pq_getmsgint64(&reply_message);

    replyTime = pq_getmsgint64(&reply_message);

    replyRequested = pq_getmsgbyte(&reply_message);


    if (message_level_is_interesting(DEBUG2))

    {

        char       *replyTimeStr;


        /* Copy because timestamptz_to_str returns a static buffer */

        replyTimeStr = pstrdup(timestamptz_to_str(replyTime));


        elog(DEBUG2, "write %X/%X flush %X/%X apply %X/%X%s reply_time %s",

             LSN_FORMAT_ARGS(writePtr),

             LSN_FORMAT_ARGS(flushPtr),

             LSN_FORMAT_ARGS(applyPtr),

             replyRequested ? " (reply requested)" : "",

             replyTimeStr);


        pfree(replyTimeStr);

    }


    /* See if we can compute the round-trip lag for these positions. */

    now = GetCurrentTimestamp();

    writeLag = LagTrackerRead(SYNC_REP_WAIT_WRITE, writePtr, now);

    flushLag = LagTrackerRead(SYNC_REP_WAIT_FLUSH, flushPtr, now);

    applyLag = LagTrackerRead(SYNC_REP_WAIT_APPLY, applyPtr, now);


    /*

     * If the standby reports that it has fully replayed the WAL in two

     * consecutive reply messages, then the second such message must result

     * from wal_receiver_status_interval expiring on the standby.  This is a

     * convenient time to forget the lag times measured when it last

     * wrote/flushed/applied a WAL record, to avoid displaying stale lag data

     * until more WAL traffic arrives.

     */

    clearLagTimes = false;

    if (applyPtr == sentPtr)

    {

        if (fullyAppliedLastTime)

            clearLagTimes = true;

        fullyAppliedLastTime = true;

    }

    else

        fullyAppliedLastTime = false;


    /* Send a reply if the standby requested one. */

    if (replyRequested)

        WalSndKeepalive(false, InvalidXLogRecPtr);


    /*

     * Update shared state for this WalSender process based on reply data from

     * standby.

     */

    {

        WalSnd     *walsnd = MyWalSnd;


        SpinLockAcquire(&walsnd->mutex);

        walsnd->write = writePtr;

        walsnd->flush = flushPtr;

        walsnd->apply = applyPtr;

        if (writeLag != -1 || clearLagTimes)

            walsnd->writeLag = writeLag;

        if (flushLag != -1 || clearLagTimes)

            walsnd->flushLag = flushLag;

        if (applyLag != -1 || clearLagTimes)

            walsnd->applyLag = applyLag;

        walsnd->replyTime = replyTime;

        SpinLockRelease(&walsnd->mutex);

    }


    if (!am_cascading_walsender)

        SyncRepReleaseWaiters();


    /*

     * Advance our local xmin horizon when the client confirmed a flush.

     */

    if (MyReplicationSlot && flushPtr != InvalidXLogRecPtr)

    {

        if (SlotIsLogical(MyReplicationSlot))

            LogicalConfirmReceivedLocation(flushPtr);

        else

            PhysicalConfirmReceivedLocation(flushPtr);

    }

}


/* compute new replication slot xmin horizon if needed */

static void

PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbackCatalogXmin)

{

    bool        changed = false;

    ReplicationSlot *slot = MyReplicationSlot;


    SpinLockAcquire(&slot->mutex);

    MyProc->xmin = InvalidTransactionId;


    /*

     * For physical replication we don't need the interlock provided by xmin

     * and effective_xmin since the consequences of a missed increase are

     * limited to query cancellations, so set both at once.

     */

    if (!TransactionIdIsNormal(slot->data.xmin) ||

        !TransactionIdIsNormal(feedbackXmin) ||

        TransactionIdPrecedes(slot->data.xmin, feedbackXmin))

    {

        changed = true;

        slot->data.xmin = feedbackXmin;

        slot->effective_xmin = feedbackXmin;

    }

    if (!TransactionIdIsNormal(slot->data.catalog_xmin) ||

        !TransactionIdIsNormal(feedbackCatalogXmin) ||

        TransactionIdPrecedes(slot->data.catalog_xmin, feedbackCatalogXmin))

    {

        changed = true;

        slot->data.catalog_xmin = feedbackCatalogXmin;

        slot->effective_catalog_xmin = feedbackCatalogXmin;

    }

    SpinLockRelease(&slot->mutex);


    if (changed)

    {

        ReplicationSlotMarkDirty();

        ReplicationSlotsComputeRequiredXmin(false);

    }

}


/*

 * Check that the provided xmin/epoch are sane, that is, not in the future

 * and not so far back as to be already wrapped around.

 *

 * Epoch of nextXid should be same as standby, or if the counter has

 * wrapped, then one greater than standby.

 *

 * This check doesn't care about whether clog exists for these xids

 * at all.

 */

static bool

TransactionIdInRecentPast(TransactionId xid, uint32 epoch)

{

    FullTransactionId nextFullXid;

    TransactionId nextXid;

    uint32      nextEpoch;


    nextFullXid = ReadNextFullTransactionId();

    nextXid = XidFromFullTransactionId(nextFullXid);

    nextEpoch = EpochFromFullTransactionId(nextFullXid);


    if (xid <= nextXid)

    {

        if (epoch != nextEpoch)

            return false;

    }

    else

    {

        if (epoch + 1 != nextEpoch)

            return false;

    }


    if (!TransactionIdPrecedesOrEquals(xid, nextXid))

        return false;           /* epoch OK, but it's wrapped around */


    return true;

}


/*

 * Hot Standby feedback

 */

static void

ProcessStandbyHSFeedbackMessage(void)

{

    TransactionId feedbackXmin;

    uint32      feedbackEpoch;

    TransactionId feedbackCatalogXmin;

    uint32      feedbackCatalogEpoch;

    TimestampTz replyTime;


    /*

     * Decipher the reply message. The caller already consumed the msgtype

     * byte. See XLogWalRcvSendHSFeedback() in walreceiver.c for the creation

     * of this message.

     */

    replyTime = pq_getmsgint64(&reply_message);

    feedbackXmin = pq_getmsgint(&reply_message, 4);

    feedbackEpoch = pq_getmsgint(&reply_message, 4);

    feedbackCatalogXmin = pq_getmsgint(&reply_message, 4);

    feedbackCatalogEpoch = pq_getmsgint(&reply_message, 4);


    if (message_level_is_interesting(DEBUG2))

    {

        char       *replyTimeStr;


        /* Copy because timestamptz_to_str returns a static buffer */

        replyTimeStr = pstrdup(timestamptz_to_str(replyTime));


        elog(DEBUG2, "hot standby feedback xmin %u epoch %u, catalog_xmin %u epoch %u reply_time %s",

             feedbackXmin,

             feedbackEpoch,

             feedbackCatalogXmin,

             feedbackCatalogEpoch,

             replyTimeStr);


        pfree(replyTimeStr);

    }


    /*

     * Update shared state for this WalSender process based on reply data from

     * standby.

     */

    {

        WalSnd     *walsnd = MyWalSnd;


        SpinLockAcquire(&walsnd->mutex);

        walsnd->replyTime = replyTime;

        SpinLockRelease(&walsnd->mutex);

    }


    /*

     * Unset WalSender's xmins if the feedback message values are invalid.

     * This happens when the downstream turned hot_standby_feedback off.

     */

    if (!TransactionIdIsNormal(feedbackXmin)

        && !TransactionIdIsNormal(feedbackCatalogXmin))

    {

        MyProc->xmin = InvalidTransactionId;

        if (MyReplicationSlot != NULL)

            PhysicalReplicationSlotNewXmin(feedbackXmin, feedbackCatalogXmin);

        return;

    }


    /*

     * Check that the provided xmin/epoch are sane, that is, not in the future

     * and not so far back as to be already wrapped around.  Ignore if not.

     */

    if (TransactionIdIsNormal(feedbackXmin) &&

        !TransactionIdInRecentPast(feedbackXmin, feedbackEpoch))

        return;


    if (TransactionIdIsNormal(feedbackCatalogXmin) &&

        !TransactionIdInRecentPast(feedbackCatalogXmin, feedbackCatalogEpoch))

        return;


    /*

     * Set the WalSender's xmin equal to the standby's requested xmin, so that

     * the xmin will be taken into account by GetSnapshotData() /

     * ComputeXidHorizons().  This will hold back the removal of dead rows and

     * thereby prevent the generation of cleanup conflicts on the standby

     * server.

     *

     * There is a small window for a race condition here: although we just

     * checked that feedbackXmin precedes nextXid, the nextXid could have

     * gotten advanced between our fetching it and applying the xmin below,

     * perhaps far enough to make feedbackXmin wrap around.  In that case the

     * xmin we set here would be "in the future" and have no effect.  No point

     * in worrying about this since it's too late to save the desired data

     * anyway.  Assuming that the standby sends us an increasing sequence of

     * xmins, this could only happen during the first reply cycle, else our

     * own xmin would prevent nextXid from advancing so far.

     *

     * We don't bother taking the ProcArrayLock here.  Setting the xmin field

     * is assumed atomic, and there's no real need to prevent concurrent

     * horizon determinations.  (If we're moving our xmin forward, this is

     * obviously safe, and if we're moving it backwards, well, the data is at

     * risk already since a VACUUM could already have determined the horizon.)

     *

     * If we're using a replication slot we reserve the xmin via that,

     * otherwise via the walsender's PGPROC entry. We can only track the

     * catalog xmin separately when using a slot, so we store the least of the

     * two provided when not using a slot.

     *

     * XXX: It might make sense to generalize the ephemeral slot concept and

     * always use the slot mechanism to handle the feedback xmin.

     */

    if (MyReplicationSlot != NULL)  /* XXX: persistency configurable? */

        PhysicalReplicationSlotNewXmin(feedbackXmin, feedbackCatalogXmin);

    else

    {

        if (TransactionIdIsNormal(feedbackCatalogXmin)

            && TransactionIdPrecedes(feedbackCatalogXmin, feedbackXmin))

            MyProc->xmin = feedbackCatalogXmin;

        else

            MyProc->xmin = feedbackXmin;

    }

}


/*

 * Compute how long send/receive loops should sleep.

 *

 * If wal_sender_timeout is enabled we want to wake up in time to send

 * keepalives and to abort the connection if wal_sender_timeout has been

 * reached.

 */

static long

WalSndComputeSleeptime(TimestampTz now)

{

    long        sleeptime = 10000;  /* 10 s */


    if (wal_sender_timeout > 0 && last_reply_timestamp > 0)

    {

        TimestampTz wakeup_time;


        /*

         * At the latest stop sleeping once wal_sender_timeout has been

         * reached.

         */

        wakeup_time = TimestampTzPlusMilliseconds(last_reply_timestamp,

                                                  wal_sender_timeout);


        /*

         * If no ping has been sent yet, wakeup when it's time to do so.

         * WalSndKeepaliveIfNecessary() wants to send a keepalive once half of

         * the timeout passed without a response.

         */

        if (!waiting_for_ping_response)

            wakeup_time = TimestampTzPlusMilliseconds(last_reply_timestamp,

                                                      wal_sender_timeout / 2);


        /* Compute relative time until wakeup. */

        sleeptime = TimestampDifferenceMilliseconds(now, wakeup_time);

    }


    return sleeptime;

}


/*

 * Check whether there have been responses by the client within

 * wal_sender_timeout and shutdown if not.  Using last_processing as the

 * reference point avoids counting server-side stalls against the client.

 * However, a long server-side stall can make WalSndKeepaliveIfNecessary()

 * postdate last_processing by more than wal_sender_timeout.  If that happens,

 * the client must reply almost immediately to avoid a timeout.  This rarely

 * affects the default configuration, under which clients spontaneously send a

 * message every standby_message_timeout = wal_sender_timeout/6 = 10s.  We

 * could eliminate that problem by recognizing timeout expiration at

 * wal_sender_timeout/2 after the keepalive.

 */

static void

WalSndCheckTimeOut(void)

{

    TimestampTz timeout;


    /* don't bail out if we're doing something that doesn't require timeouts */

    if (last_reply_timestamp <= 0)

        return;


    timeout = TimestampTzPlusMilliseconds(last_reply_timestamp,

                                          wal_sender_timeout);


    if (wal_sender_timeout > 0 && last_processing >= timeout)

    {

        /*

         * Since typically expiration of replication timeout means

         * communication problem, we don't send the error message to the

         * standby.

         */

        ereport(COMMERROR,

                (errmsg("terminating walsender process due to replication timeout")));


        WalSndShutdown();

    }

}


/* Main loop of walsender process that streams the WAL over Copy messages. */

static void

WalSndLoop(WalSndSendDataCallback send_data)

{

    TimestampTz last_flush = 0;


    /*

     * Initialize the last reply timestamp. That enables timeout processing

     * from hereon.

     */

    last_reply_timestamp = GetCurrentTimestamp();

    waiting_for_ping_response = false;


    /*

     * Loop until we reach the end of this timeline or the client requests to

     * stop streaming.

     */

    for (;;)

    {

        /* Clear any already-pending wakeups */

        ResetLatch(MyLatch);


        CHECK_FOR_INTERRUPTS();


        /* Process any requests or signals received recently */

        if (ConfigReloadPending)

        {

            ConfigReloadPending = false;

            ProcessConfigFile(PGC_SIGHUP);

            SyncRepInitConfig();

        }


        /* Check for input from the client */

        ProcessRepliesIfAny();


        /*

         * If we have received CopyDone from the client, sent CopyDone

         * ourselves, and the output buffer is empty, it's time to exit

         * streaming.

         */

        if (streamingDoneReceiving && streamingDoneSending &&

            !pq_is_send_pending())

            break;


        /*

         * If we don't have any pending data in the output buffer, try to send

         * some more.  If there is some, we don't bother to call send_data

         * again until we've flushed it ... but we'd better assume we are not

         * caught up.

         */

        if (!pq_is_send_pending())

            send_data();

        else

            WalSndCaughtUp = false;


        /* Try to flush pending output to the client */

        if (pq_flush_if_writable() != 0)

            WalSndShutdown();


        /* If nothing remains to be sent right now ... */

        if (WalSndCaughtUp && !pq_is_send_pending())

        {

            /*

             * If we're in catchup state, move to streaming.  This is an

             * important state change for users to know about, since before

             * this point data loss might occur if the primary dies and we

             * need to failover to the standby. The state change is also

             * important for synchronous replication, since commits that

             * started to wait at that point might wait for some time.

             */

            if (MyWalSnd->state == WALSNDSTATE_CATCHUP)

            {

                ereport(DEBUG1,

                        (errmsg_internal("\"%s\" has now caught up with upstream server",

                                         application_name)));

                WalSndSetState(WALSNDSTATE_STREAMING);

            }


            /*

             * When SIGUSR2 arrives, we send any outstanding logs up to the

             * shutdown checkpoint record (i.e., the latest record), wait for

             * them to be replicated to the standby, and exit. This may be a

             * normal termination at shutdown, or a promotion, the walsender

             * is not sure which.

             */

            if (got_SIGUSR2)

                WalSndDone(send_data);

        }


        /* Check for replication timeout. */

        WalSndCheckTimeOut();


        /* Send keepalive if the time has come */

        WalSndKeepaliveIfNecessary();


        /*

         * Block if we have unsent data.  XXX For logical replication, let

         * WalSndWaitForWal() handle any other blocking; idle receivers need

         * its additional actions.  For physical replication, also block if

         * caught up; its send_data does not block.

         *

         * The IO statistics are reported in WalSndWaitForWal() for the

         * logical WAL senders.

         */

        if ((WalSndCaughtUp && send_data != XLogSendLogical &&

             !streamingDoneSending) ||

            pq_is_send_pending())

        {

            long        sleeptime;

            int         wakeEvents;

            TimestampTz now;


            if (!streamingDoneReceiving)

                wakeEvents = WL_SOCKET_READABLE;

            else

                wakeEvents = 0;


            /*

             * Use fresh timestamp, not last_processing, to reduce the chance

             * of reaching wal_sender_timeout before sending a keepalive.

             */

            now = GetCurrentTimestamp();

            sleeptime = WalSndComputeSleeptime(now);


            if (pq_is_send_pending())

                wakeEvents |= WL_SOCKET_WRITEABLE;


            /* Report IO statistics, if needed */

            if (TimestampDifferenceExceeds(last_flush, now,

                                           WALSENDER_STATS_FLUSH_INTERVAL))

            {

                pgstat_flush_io(false);

                (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO);

                last_flush = now;

            }


            /* Sleep until something happens or we time out */

            WalSndWait(wakeEvents, sleeptime, WAIT_EVENT_WAL_SENDER_MAIN);

        }

    }

}


/* Initialize a per-walsender data structure for this walsender process */

static void

InitWalSenderSlot(void)

{

    int         i;


    /*

     * WalSndCtl should be set up already (we inherit this by fork() or

     * EXEC_BACKEND mechanism from the postmaster).

     */

    Assert(WalSndCtl != NULL);

    Assert(MyWalSnd == NULL);


    /*

     * Find a free walsender slot and reserve it. This must not fail due to

     * the prior check for free WAL senders in InitProcess().

     */

    for (i = 0; i < max_wal_senders; i++)

    {

        WalSnd     *walsnd = &WalSndCtl->walsnds[i];


        SpinLockAcquire(&walsnd->mutex);


        if (walsnd->pid != 0)

        {

            SpinLockRelease(&walsnd->mutex);

            continue;

        }

        else

        {

            /*

             * Found a free slot. Reserve it for us.

             */

            walsnd->pid = MyProcPid;

            walsnd->state = WALSNDSTATE_STARTUP;

            walsnd->sentPtr = InvalidXLogRecPtr;

            walsnd->needreload = false;

            walsnd->write = InvalidXLogRecPtr;

            walsnd->flush = InvalidXLogRecPtr;

            walsnd->apply = InvalidXLogRecPtr;

            walsnd->writeLag = -1;

            walsnd->flushLag = -1;

            walsnd->applyLag = -1;

            walsnd->sync_standby_priority = 0;

            walsnd->replyTime = 0;


            /*

             * The kind assignment is done here and not in StartReplication()

             * and StartLogicalReplication(). Indeed, the logical walsender

             * needs to read WAL records (like snapshot of running

             * transactions) during the slot creation. So it needs to be woken

             * up based on its kind.

             *

             * The kind assignment could also be done in StartReplication(),

             * StartLogicalReplication() and CREATE_REPLICATION_SLOT but it

             * seems better to set it on one place.

             */

            if (MyDatabaseId == InvalidOid)

                walsnd->kind = REPLICATION_KIND_PHYSICAL;

            else

                walsnd->kind = REPLICATION_KIND_LOGICAL;


            SpinLockRelease(&walsnd->mutex);

            /* don't need the lock anymore */

            MyWalSnd = (WalSnd *) walsnd;


            break;

        }

    }


    Assert(MyWalSnd != NULL);


    /* Arrange to clean up at walsender exit */

    on_shmem_exit(WalSndKill, 0);

}


/* Destroy the per-walsender data structure for this walsender process */

static void

WalSndKill(int code, Datum arg)

{

    WalSnd     *walsnd = MyWalSnd;


    Assert(walsnd != NULL);


    MyWalSnd = NULL;


    SpinLockAcquire(&walsnd->mutex);

    /* Mark WalSnd struct as no longer being in use. */

    walsnd->pid = 0;

    SpinLockRelease(&walsnd->mutex);

}


/* XLogReaderRoutine->segment_open callback */

static void

WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,

                  TimeLineID *tli_p)

{

    char        path[MAXPGPATH];


    /*-------

     * When reading from a historic timeline, and there is a timeline switch

     * within this segment, read from the WAL segment belonging to the new

     * timeline.

     *

     * For example, imagine that this server is currently on timeline 5, and

     * we're streaming timeline 4. The switch from timeline 4 to 5 happened at

     * 0/13002088. In pg_wal, we have these files:

     *

     * ...

     * 000000040000000000000012

     * 000000040000000000000013

     * 000000050000000000000013

     * 000000050000000000000014

     * ...

     *

     * In this situation, when requested to send the WAL from segment 0x13, on

     * timeline 4, we read the WAL from file 000000050000000000000013. Archive

     * recovery prefers files from newer timelines, so if the segment was

     * restored from the archive on this server, the file belonging to the old

     * timeline, 000000040000000000000013, might not exist. Their contents are

     * equal up to the switchpoint, because at a timeline switch, the used

     * portion of the old segment is copied to the new file.

     */

    *tli_p = sendTimeLine;

    if (sendTimeLineIsHistoric)

    {

        XLogSegNo   endSegNo;


        XLByteToSeg(sendTimeLineValidUpto, endSegNo, state->segcxt.ws_segsize);

        if (nextSegNo == endSegNo)

            *tli_p = sendTimeLineNextTLI;

    }


    XLogFilePath(path, *tli_p, nextSegNo, state->segcxt.ws_segsize);

    state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);

    if (state->seg.ws_file >= 0)

        return;


    /*

     * If the file is not found, assume it's because the standby asked for a

     * too old WAL segment that has already been removed or recycled.

     */

    if (errno == ENOENT)

    {

        char        xlogfname[MAXFNAMELEN];

        int         save_errno = errno;


        XLogFileName(xlogfname, *tli_p, nextSegNo, wal_segment_size);

        errno = save_errno;

        ereport(ERROR,

                (errcode_for_file_access(),

                 errmsg("requested WAL segment %s has already been removed",

                        xlogfname)));

    }

    else

        ereport(ERROR,

                (errcode_for_file_access(),

                 errmsg("could not open file \"%s\": %m",

                        path)));

}


/*

 * Send out the WAL in its normal physical/stored form.

 *

 * Read up to MAX_SEND_SIZE bytes of WAL that's been flushed to disk,

 * but not yet sent to the client, and buffer it in the libpq output

 * buffer.

 *

 * If there is no unsent WAL remaining, WalSndCaughtUp is set to true,

 * otherwise WalSndCaughtUp is set to false.

 */

static void

XLogSendPhysical(void)

{

    XLogRecPtr  SendRqstPtr;

    XLogRecPtr  startptr;

    XLogRecPtr  endptr;

    Size        nbytes;

    XLogSegNo   segno;

    WALReadError errinfo;

    Size        rbytes;


    /* If requested switch the WAL sender to the stopping state. */

    if (got_STOPPING)

        WalSndSetState(WALSNDSTATE_STOPPING);


    if (streamingDoneSending)

    {

        WalSndCaughtUp = true;

        return;

    }


    /* Figure out how far we can safely send the WAL. */

    if (sendTimeLineIsHistoric)

    {

        /*

         * Streaming an old timeline that's in this server's history, but is

         * not the one we're currently inserting or replaying. It can be

         * streamed up to the point where we switched off that timeline.

         */

        SendRqstPtr = sendTimeLineValidUpto;

    }

    else if (am_cascading_walsender)

    {

        TimeLineID  SendRqstTLI;


        /*

         * Streaming the latest timeline on a standby.

         *

         * Attempt to send all WAL that has already been replayed, so that we

         * know it's valid. If we're receiving WAL through streaming

         * replication, it's also OK to send any WAL that has been received

         * but not replayed.

         *

         * The timeline we're recovering from can change, or we can be

         * promoted. In either case, the current timeline becomes historic. We

         * need to detect that so that we don't try to stream past the point

         * where we switched to another timeline. We check for promotion or

         * timeline switch after calculating FlushPtr, to avoid a race

         * condition: if the timeline becomes historic just after we checked

         * that it was still current, it's still be OK to stream it up to the

         * FlushPtr that was calculated before it became historic.

         */

        bool        becameHistoric = false;


        SendRqstPtr = GetStandbyFlushRecPtr(&SendRqstTLI);


        if (!RecoveryInProgress())

        {

            /* We have been promoted. */

            SendRqstTLI = GetWALInsertionTimeLine();

            am_cascading_walsender = false;

            becameHistoric = true;

        }

        else

        {

            /*

             * Still a cascading standby. But is the timeline we're sending

             * still the one recovery is recovering from?

             */

            if (sendTimeLine != SendRqstTLI)

                becameHistoric = true;

        }


        if (becameHistoric)

        {

            /*

             * The timeline we were sending has become historic. Read the

             * timeline history file of the new timeline to see where exactly

             * we forked off from the timeline we were sending.

             */

            List       *history;


            history = readTimeLineHistory(SendRqstTLI);

            sendTimeLineValidUpto = tliSwitchPoint(sendTimeLine, history, &sendTimeLineNextTLI);


            Assert(sendTimeLine < sendTimeLineNextTLI);

            list_free_deep(history);


            sendTimeLineIsHistoric = true;


            SendRqstPtr = sendTimeLineValidUpto;

        }

    }

    else

    {

        /*

         * Streaming the current timeline on a primary.

         *

         * Attempt to send all data that's already been written out and

         * fsync'd to disk.  We cannot go further than what's been written out

         * given the current implementation of WALRead().  And in any case

         * it's unsafe to send WAL that is not securely down to disk on the

         * primary: if the primary subsequently crashes and restarts, standbys

         * must not have applied any WAL that got lost on the primary.

         */

        SendRqstPtr = GetFlushRecPtr(NULL);

    }


    /*

     * Record the current system time as an approximation of the time at which

     * this WAL location was written for the purposes of lag tracking.

     *

     * In theory we could make XLogFlush() record a time in shmem whenever WAL

     * is flushed and we could get that time as well as the LSN when we call

     * GetFlushRecPtr() above (and likewise for the cascading standby

     * equivalent), but rather than putting any new code into the hot WAL path

     * it seems good enough to capture the time here.  We should reach this

     * after XLogFlush() runs WalSndWakeupProcessRequests(), and although that

     * may take some time, we read the WAL flush pointer and take the time

     * very close to together here so that we'll get a later position if it is

     * still moving.

     *

     * Because LagTrackerWrite ignores samples when the LSN hasn't advanced,

     * this gives us a cheap approximation for the WAL flush time for this

     * LSN.

     *

     * Note that the LSN is not necessarily the LSN for the data contained in

     * the present message; it's the end of the WAL, which might be further

     * ahead.  All the lag tracking machinery cares about is finding out when

     * that arbitrary LSN is eventually reported as written, flushed and

     * applied, so that it can measure the elapsed time.

     */

    LagTrackerWrite(SendRqstPtr, GetCurrentTimestamp());


    /*

     * If this is a historic timeline and we've reached the point where we

     * forked to the next timeline, stop streaming.

     *

     * Note: We might already have sent WAL > sendTimeLineValidUpto. The

     * startup process will normally replay all WAL that has been received

     * from the primary, before promoting, but if the WAL streaming is

     * terminated at a WAL page boundary, the valid portion of the timeline

     * might end in the middle of a WAL record. We might've already sent the

     * first half of that partial WAL record to the cascading standby, so that

     * sentPtr > sendTimeLineValidUpto. That's OK; the cascading standby can't

     * replay the partial WAL record either, so it can still follow our

     * timeline switch.

     */

    if (sendTimeLineIsHistoric && sendTimeLineValidUpto <= sentPtr)

    {

        /* close the current file. */

        if (xlogreader->seg.ws_file >= 0)

            wal_segment_close(xlogreader);


        /* Send CopyDone */

        pq_putmessage_noblock('c', NULL, 0);

        streamingDoneSending = true;


        WalSndCaughtUp = true;


        elog(DEBUG1, "walsender reached end of timeline at %X/%X (sent up to %X/%X)",

             LSN_FORMAT_ARGS(sendTimeLineValidUpto),

             LSN_FORMAT_ARGS(sentPtr));

        return;

    }


    /* Do we have any work to do? */

    Assert(sentPtr <= SendRqstPtr);

    if (SendRqstPtr <= sentPtr)

    {

        WalSndCaughtUp = true;

        return;

    }


    /*

     * Figure out how much to send in one message. If there's no more than

     * MAX_SEND_SIZE bytes to send, send everything. Otherwise send

     * MAX_SEND_SIZE bytes, but round back to logfile or page boundary.

     *

     * The rounding is not only for performance reasons. Walreceiver relies on

     * the fact that we never split a WAL record across two messages. Since a

     * long WAL record is split at page boundary into continuation records,

     * page boundary is always a safe cut-off point. We also assume that

     * SendRqstPtr never points to the middle of a WAL record.

     */

    startptr = sentPtr;

    endptr = startptr;

    endptr += MAX_SEND_SIZE;


    /* if we went beyond SendRqstPtr, back off */

    if (SendRqstPtr <= endptr)

    {

        endptr = SendRqstPtr;

        if (sendTimeLineIsHistoric)

            WalSndCaughtUp = false;

        else

            WalSndCaughtUp = true;

    }

    else

    {

        /* round down to page boundary. */

        endptr -= (endptr % XLOG_BLCKSZ);

        WalSndCaughtUp = false;

    }


    nbytes = endptr - startptr;

    Assert(nbytes <= MAX_SEND_SIZE);


    /*

     * OK to read and send the slice.

     */

    resetStringInfo(&output_message);

    pq_sendbyte(&output_message, 'w');


    pq_sendint64(&output_message, startptr);    /* dataStart */

    pq_sendint64(&output_message, SendRqstPtr); /* walEnd */

    pq_sendint64(&output_message, 0);   /* sendtime, filled in last */


    /*

     * Read the log directly into the output buffer to avoid extra memcpy

     * calls.

     */

    enlargeStringInfo(&output_message, nbytes);


retry:

    /* attempt to read WAL from WAL buffers first */

    rbytes = WALReadFromBuffers(&output_message.data[output_message.len],

                                startptr, nbytes, xlogreader->seg.ws_tli);

    output_message.len += rbytes;

    startptr += rbytes;

    nbytes -= rbytes;


    /* now read the remaining WAL from WAL file */

    if (nbytes > 0 &&

        !WALRead(xlogreader,

                 &output_message.data[output_message.len],

                 startptr,

                 nbytes,

                 xlogreader->seg.ws_tli,    /* Pass the current TLI because

                                             * only WalSndSegmentOpen controls

                                             * whether new TLI is needed. */

                 &errinfo))

        WALReadRaiseError(&errinfo);


    /* See logical_read_xlog_page(). */

    XLByteToSeg(startptr, segno, xlogreader->segcxt.ws_segsize);

    CheckXLogRemoved(segno, xlogreader->seg.ws_tli);


    /*

     * During recovery, the currently-open WAL file might be replaced with the

     * file of the same name retrieved from archive. So we always need to

     * check what we read was valid after reading into the buffer. If it's

     * invalid, we try to open and read the file again.

     */

    if (am_cascading_walsender)

    {

        WalSnd     *walsnd = MyWalSnd;

        bool        reload;


        SpinLockAcquire(&walsnd->mutex);

        reload = walsnd->needreload;

        walsnd->needreload = false;

        SpinLockRelease(&walsnd->mutex);


        if (reload && xlogreader->seg.ws_file >= 0)

        {

            wal_segment_close(xlogreader);


            goto retry;

        }

    }


    output_message.len += nbytes;

    output_message.data[output_message.len] = '\0';


    /*

     * Fill the send timestamp last, so that it is taken as late as possible.

     */

    resetStringInfo(&tmpbuf);

    pq_sendint64(&tmpbuf, GetCurrentTimestamp());

    memcpy(&output_message.data[1 + sizeof(int64) + sizeof(int64)],

           tmpbuf.data, sizeof(int64));


    pq_putmessage_noblock('d', output_message.data, output_message.len);


    sentPtr = endptr;


    /* Update shared memory status */

    {

        WalSnd     *walsnd = MyWalSnd;


        SpinLockAcquire(&walsnd->mutex);

        walsnd->sentPtr = sentPtr;

        SpinLockRelease(&walsnd->mutex);

    }


    /* Report progress of XLOG streaming in PS display */

    if (update_process_title)

    {

        char        activitymsg[50];


        snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X",

                 LSN_FORMAT_ARGS(sentPtr));

        set_ps_display(activitymsg);

    }

}


/*

 * Stream out logically decoded data.

 */

static void

XLogSendLogical(void)

{

    XLogRecord *record;

    char       *errm;


    /*

     * We'll use the current flush point to determine whether we've caught up.

     * This variable is static in order to cache it across calls.  Caching is

     * helpful because GetFlushRecPtr() needs to acquire a heavily-contended

     * spinlock.

     */

    static XLogRecPtr flushPtr = InvalidXLogRecPtr;


    /*

     * Don't know whether we've caught up yet. We'll set WalSndCaughtUp to

     * true in WalSndWaitForWal, if we're actually waiting. We also set to

     * true if XLogReadRecord() had to stop reading but WalSndWaitForWal

     * didn't wait - i.e. when we're shutting down.

     */

    WalSndCaughtUp = false;


    record = XLogReadRecord(logical_decoding_ctx->reader, &errm);


    /* xlog record was invalid */

    if (errm != NULL)

        elog(ERROR, "could not find record while sending logically-decoded data: %s",

             errm);


    if (record != NULL)

    {

        /*

         * Note the lack of any call to LagTrackerWrite() which is handled by

         * WalSndUpdateProgress which is called by output plugin through

         * logical decoding write api.

         */

        LogicalDecodingProcessRecord(logical_decoding_ctx, logical_decoding_ctx->reader);


        sentPtr = logical_decoding_ctx->reader->EndRecPtr;

    }


    /*

     * If first time through in this session, initialize flushPtr.  Otherwise,

     * we only need to update flushPtr if EndRecPtr is past it.

     */

    if (flushPtr == InvalidXLogRecPtr ||

        logical_decoding_ctx->reader->EndRecPtr >= flushPtr)

    {

        if (am_cascading_walsender)

            flushPtr = GetStandbyFlushRecPtr(NULL);

        else

            flushPtr = GetFlushRecPtr(NULL);

    }


    /* If EndRecPtr is still past our flushPtr, it means we caught up. */

    if (logical_decoding_ctx->reader->EndRecPtr >= flushPtr)

        WalSndCaughtUp = true;


    /*

     * If we're caught up and have been requested to stop, have WalSndLoop()

     * terminate the connection in an orderly manner, after writing out all

     * the pending data.

     */

    if (WalSndCaughtUp && got_STOPPING)

        got_SIGUSR2 = true;


    /* Update shared memory status */

    {

        WalSnd     *walsnd = MyWalSnd;


        SpinLockAcquire(&walsnd->mutex);

        walsnd->sentPtr = sentPtr;

        SpinLockRelease(&walsnd->mutex);

    }

}


/*

 * Shutdown if the sender is caught up.

 *

 * NB: This should only be called when the shutdown signal has been received

 * from postmaster.

 *

 * Note that if we determine that there's still more data to send, this

 * function will return control to the caller.

 */

static void

WalSndDone(WalSndSendDataCallback send_data)

{

    XLogRecPtr  replicatedPtr;


    /* ... let's just be real sure we're caught up ... */

    send_data();


    /*

     * To figure out whether all WAL has successfully been replicated, check

     * flush location if valid, write otherwise. Tools like pg_receivewal will

     * usually (unless in synchronous mode) return an invalid flush location.

     */

    replicatedPtr = XLogRecPtrIsInvalid(MyWalSnd->flush) ?

        MyWalSnd->write : MyWalSnd->flush;


    if (WalSndCaughtUp && sentPtr == replicatedPtr &&

        !pq_is_send_pending())

    {

        QueryCompletion qc;


        /* Inform the standby that XLOG streaming is done */

        SetQueryCompletion(&qc, CMDTAG_COPY, 0);

        EndCommand(&qc, DestRemote, false);

        pq_flush();


        proc_exit(0);

    }

    if (!waiting_for_ping_response)

        WalSndKeepalive(true, InvalidXLogRecPtr);

}


/*

 * Returns the latest point in WAL that has been safely flushed to disk.

 * This should only be called when in recovery.

 *

 * This is called either by cascading walsender to find WAL position to be sent

 * to a cascaded standby or by slot synchronization operation to validate remote

 * slot's lsn before syncing it locally.

 *

 * As a side-effect, *tli is updated to the TLI of the last

 * replayed WAL record.

 */

XLogRecPtr

GetStandbyFlushRecPtr(TimeLineID *tli)

{

    XLogRecPtr  replayPtr;

    TimeLineID  replayTLI;

    XLogRecPtr  receivePtr;

    TimeLineID  receiveTLI;

    XLogRecPtr  result;


    Assert(am_cascading_walsender || IsSyncingReplicationSlots());


    /*

     * We can safely send what's already been replayed. Also, if walreceiver

     * is streaming WAL from the same timeline, we can send anything that it

     * has streamed, but hasn't been replayed yet.

     */


    receivePtr = GetWalRcvFlushRecPtr(NULL, &receiveTLI);

    replayPtr = GetXLogReplayRecPtr(&replayTLI);


    if (tli)

        *tli = replayTLI;


    result = replayPtr;

    if (receiveTLI == replayTLI && receivePtr > replayPtr)

        result = receivePtr;


    return result;

}


/*

 * Request walsenders to reload the currently-open WAL file

 */

void

WalSndRqstFileReload(void)

{

    int         i;


    for (i = 0; i < max_wal_senders; i++)

    {

        WalSnd     *walsnd = &WalSndCtl->walsnds[i];


        SpinLockAcquire(&walsnd->mutex);

        if (walsnd->pid == 0)

        {

            SpinLockRelease(&walsnd->mutex);

            continue;

        }

        walsnd->needreload = true;

        SpinLockRelease(&walsnd->mutex);

    }

}


/*

 * Handle PROCSIG_WALSND_INIT_STOPPING signal.

 */

void

HandleWalSndInitStopping(void)

{

    Assert(am_walsender);


    /*

     * If replication has not yet started, die like with SIGTERM. If

     * replication is active, only set a flag and wake up the main loop. It

     * will send any outstanding WAL, wait for it to be replicated to the

     * standby, and then exit gracefully.

     */

    if (!replication_active)

        kill(MyProcPid, SIGTERM);

    else

        got_STOPPING = true;

}


/*

 * SIGUSR2: set flag to do a last cycle and shut down afterwards. The WAL

 * sender should already have been switched to WALSNDSTATE_STOPPING at

 * this point.

 */

static void

WalSndLastCycleHandler(SIGNAL_ARGS)

{

    got_SIGUSR2 = true;

    SetLatch(MyLatch);

}


/* Set up signal handlers */

void

WalSndSignals(void)

{

    /* Set up signal handlers */

    pqsignal(SIGHUP, SignalHandlerForConfigReload);

    pqsignal(SIGINT, StatementCancelHandler);   /* query cancel */

    pqsignal(SIGTERM, die);     /* request shutdown */

    /* SIGQUIT handler was already set up by InitPostmasterChild */

    InitializeTimeouts();       /* establishes SIGALRM handler */

    pqsignal(SIGPIPE, SIG_IGN);

    pqsignal(SIGUSR1, procsignal_sigusr1_handler);

    pqsignal(SIGUSR2, WalSndLastCycleHandler);  /* request a last cycle and

                                                 * shutdown */


    /* Reset some signals that are accepted by postmaster but not here */

    pqsignal(SIGCHLD, SIG_DFL);

}


/* Report shared-memory space needed by WalSndShmemInit */

Size

WalSndShmemSize(void)

{

    Size        size = 0;


    size = offsetof(WalSndCtlData, walsnds);

    size = add_size(size, mul_size(max_wal_senders, sizeof(WalSnd)));


    return size;

}


/* Allocate and initialize walsender-related shared memory */

void

WalSndShmemInit(void)

{

    bool        found;

    int         i;


    WalSndCtl = (WalSndCtlData *)

        ShmemInitStruct("Wal Sender Ctl", WalSndShmemSize(), &found);


    if (!found)

    {

        /* First time through, so initialize */

        MemSet(WalSndCtl, 0, WalSndShmemSize());


        for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; i++)

            dlist_init(&(WalSndCtl->SyncRepQueue[i]));


        for (i = 0; i < max_wal_senders; i++)

        {

            WalSnd     *walsnd = &WalSndCtl->walsnds[i];


            SpinLockInit(&walsnd->mutex);

        }


        ConditionVariableInit(&WalSndCtl->wal_flush_cv);

        ConditionVariableInit(&WalSndCtl->wal_replay_cv);

        ConditionVariableInit(&WalSndCtl->wal_confirm_rcv_cv);

    }

}


/*

 * Wake up physical, logical or both kinds of walsenders

 *

 * The distinction between physical and logical walsenders is done, because:

 * - physical walsenders can't send data until it's been flushed

 * - logical walsenders on standby can't decode and send data until it's been

 *   applied

 *

 * For cascading replication we need to wake up physical walsenders separately

 * from logical walsenders (see the comment before calling WalSndWakeup() in

 * ApplyWalRecord() for more details).

 *

 * This will be called inside critical sections, so throwing an error is not

 * advisable.

 */

void

WalSndWakeup(bool physical, bool logical)

{

    /*

     * Wake up all the walsenders waiting on WAL being flushed or replayed

     * respectively.  Note that waiting walsender would have prepared to sleep

     * on the CV (i.e., added itself to the CV's waitlist) in WalSndWait()

     * before actually waiting.

     */

    if (physical)

        ConditionVariableBroadcast(&WalSndCtl->wal_flush_cv);


    if (logical)

        ConditionVariableBroadcast(&WalSndCtl->wal_replay_cv);

}


/*

 * Wait for readiness on the FeBe socket, or a timeout.  The mask should be

 * composed of optional WL_SOCKET_WRITEABLE and WL_SOCKET_READABLE flags.  Exit

 * on postmaster death.

 */

static void

WalSndWait(uint32 socket_events, long timeout, uint32 wait_event)

{

    WaitEvent   event;


    ModifyWaitEvent(FeBeWaitSet, FeBeWaitSetSocketPos, socket_events, NULL);


    /*

     * We use a condition variable to efficiently wake up walsenders in

     * WalSndWakeup().

     *

     * Every walsender prepares to sleep on a shared memory CV. Note that it

     * just prepares to sleep on the CV (i.e., adds itself to the CV's

     * waitlist), but does not actually wait on the CV (IOW, it never calls

     * ConditionVariableSleep()). It still uses WaitEventSetWait() for

     * waiting, because we also need to wait for socket events. The processes

     * (startup process, walreceiver etc.) wanting to wake up walsenders use

     * ConditionVariableBroadcast(), which in turn calls SetLatch(), helping

     * walsenders come out of WaitEventSetWait().

     *

     * This approach is simple and efficient because, one doesn't have to loop

     * through all the walsenders slots, with a spinlock acquisition and

     * release for every iteration, just to wake up only the waiting

     * walsenders. It makes WalSndWakeup() callers' life easy.

     *

     * XXX: A desirable future improvement would be to add support for CVs

     * into WaitEventSetWait().

     *

     * And, we use separate shared memory CVs for physical and logical

     * walsenders for selective wake ups, see WalSndWakeup() for more details.

     *

     * If the wait event is WAIT_FOR_STANDBY_CONFIRMATION, wait on another CV

     * until awakened by physical walsenders after the walreceiver confirms

     * the receipt of the LSN.

     */

    if (wait_event == WAIT_EVENT_WAIT_FOR_STANDBY_CONFIRMATION)

        ConditionVariablePrepareToSleep(&WalSndCtl->wal_confirm_rcv_cv);

    else if (MyWalSnd->kind == REPLICATION_KIND_PHYSICAL)

        ConditionVariablePrepareToSleep(&WalSndCtl->wal_flush_cv);

    else if (MyWalSnd->kind == REPLICATION_KIND_LOGICAL)

        ConditionVariablePrepareToSleep(&WalSndCtl->wal_replay_cv);


    if (WaitEventSetWait(FeBeWaitSet, timeout, &event, 1, wait_event) == 1 &&

        (event.events & WL_POSTMASTER_DEATH))

    {

        ConditionVariableCancelSleep();

        proc_exit(1);

    }


    ConditionVariableCancelSleep();

}


/*

 * Signal all walsenders to move to stopping state.

 *

 * This will trigger walsenders to move to a state where no further WAL can be

 * generated. See this file's header for details.

 */

void

WalSndInitStopping(void)

{

    int         i;


    for (i = 0; i < max_wal_senders; i++)

    {

        WalSnd     *walsnd = &WalSndCtl->walsnds[i];

        pid_t       pid;


        SpinLockAcquire(&walsnd->mutex);

        pid = walsnd->pid;

        SpinLockRelease(&walsnd->mutex);


        if (pid == 0)

            continue;


        SendProcSignal(pid, PROCSIG_WALSND_INIT_STOPPING, INVALID_PROC_NUMBER);

    }

}


/*

 * Wait that all the WAL senders have quit or reached the stopping state. This

 * is used by the checkpointer to control when the shutdown checkpoint can

 * safely be performed.

 */

void

WalSndWaitStopping(void)

{

    for (;;)

    {

        int         i;

        bool        all_stopped = true;


        for (i = 0; i < max_wal_senders; i++)

        {

            WalSnd     *walsnd = &WalSndCtl->walsnds[i];


            SpinLockAcquire(&walsnd->mutex);


            if (walsnd->pid == 0)

            {

                SpinLockRelease(&walsnd->mutex);

                continue;

            }


            if (walsnd->state != WALSNDSTATE_STOPPING)

            {

                all_stopped = false;

                SpinLockRelease(&walsnd->mutex);

                break;

            }

            SpinLockRelease(&walsnd->mutex);

        }


        /* safe to leave if confirmation is done for all WAL senders */

        if (all_stopped)

            return;


        pg_usleep(10000L);      /* wait for 10 msec */

    }

}


/* Set state for current walsender (only called in walsender) */

void

WalSndSetState(WalSndState state)

{

    WalSnd     *walsnd = MyWalSnd;


    Assert(am_walsender);


    if (walsnd->state == state)

        return;


    SpinLockAcquire(&walsnd->mutex);

    walsnd->state = state;

    SpinLockRelease(&walsnd->mutex);

}


/*

 * Return a string constant representing the state. This is used

 * in system views, and should *not* be translated.

 */

static const char *

WalSndGetStateString(WalSndState state)

{

    switch (state)

    {

        case WALSNDSTATE_STARTUP:

            return "startup";

        case WALSNDSTATE_BACKUP:

            return "backup";

        case WALSNDSTATE_CATCHUP:

            return "catchup";

        case WALSNDSTATE_STREAMING:

            return "streaming";

        case WALSNDSTATE_STOPPING:

            return "stopping";

    }

    return "UNKNOWN";

}


static Interval *

offset_to_interval(TimeOffset offset)

{

    Interval   *result = palloc(sizeof(Interval));


    result->month = 0;

    result->day = 0;

    result->time = offset;


    return result;

}


/*

 * Returns activity of walsenders, including pids and xlog locations sent to

 * standby servers.

 */

Datum

pg_stat_get_wal_senders(PG_FUNCTION_ARGS)

{

#define PG_STAT_GET_WAL_SENDERS_COLS    12

    ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;

    SyncRepStandbyData *sync_standbys;

    int         num_standbys;

    int         i;


    InitMaterializedSRF(fcinfo, 0);


    /*

     * Get the currently active synchronous standbys.  This could be out of

     * date before we're done, but we'll use the data anyway.

     */

    num_standbys = SyncRepGetCandidateStandbys(&sync_standbys);


    for (i = 0; i < max_wal_senders; i++)

    {

        WalSnd     *walsnd = &WalSndCtl->walsnds[i];

        XLogRecPtr  sent_ptr;

        XLogRecPtr  write;

        XLogRecPtr  flush;

        XLogRecPtr  apply;

        TimeOffset  writeLag;

        TimeOffset  flushLag;

        TimeOffset  applyLag;

        int         priority;

        int         pid;

        WalSndState state;

        TimestampTz replyTime;

        bool        is_sync_standby;

        Datum       values[PG_STAT_GET_WAL_SENDERS_COLS];

        bool        nulls[PG_STAT_GET_WAL_SENDERS_COLS] = {0};

        int         j;


        /* Collect data from shared memory */

        SpinLockAcquire(&walsnd->mutex);

        if (walsnd->pid == 0)

        {

            SpinLockRelease(&walsnd->mutex);

            continue;

        }

        pid = walsnd->pid;

        sent_ptr = walsnd->sentPtr;

        state = walsnd->state;

        write = walsnd->write;

        flush = walsnd->flush;

        apply = walsnd->apply;

        writeLag = walsnd->writeLag;

        flushLag = walsnd->flushLag;

        applyLag = walsnd->applyLag;

        priority = walsnd->sync_standby_priority;

        replyTime = walsnd->replyTime;

        SpinLockRelease(&walsnd->mutex);


        /*

         * Detect whether walsender is/was considered synchronous.  We can

         * provide some protection against stale data by checking the PID

         * along with walsnd_index.

         */

        is_sync_standby = false;

        for (j = 0; j < num_standbys; j++)

        {

            if (sync_standbys[j].walsnd_index == i &&

                sync_standbys[j].pid == pid)

            {

                is_sync_standby = true;

                break;

            }

        }


        values[0] = Int32GetDatum(pid);


        if (!has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS))

        {

            /*

             * Only superusers and roles with privileges of pg_read_all_stats

             * can see details. Other users only get the pid value to know

             * it's a walsender, but no details.

             */

            MemSet(&nulls[1], true, PG_STAT_GET_WAL_SENDERS_COLS - 1);

        }

        else

        {

            values[1] = CStringGetTextDatum(WalSndGetStateString(state));


            if (XLogRecPtrIsInvalid(sent_ptr))

                nulls[2] = true;

            values[2] = LSNGetDatum(sent_ptr);


            if (XLogRecPtrIsInvalid(write))

                nulls[3] = true;

            values[3] = LSNGetDatum(write);


            if (XLogRecPtrIsInvalid(flush))

                nulls[4] = true;

            values[4] = LSNGetDatum(flush);


            if (XLogRecPtrIsInvalid(apply))

                nulls[5] = true;

            values[5] = LSNGetDatum(apply);


            /*

             * Treat a standby such as a pg_basebackup background process

             * which always returns an invalid flush location, as an

             * asynchronous standby.

             */

            priority = XLogRecPtrIsInvalid(flush) ? 0 : priority;


            if (writeLag < 0)

                nulls[6] = true;

            else

                values[6] = IntervalPGetDatum(offset_to_interval(writeLag));


            if (flushLag < 0)

                nulls[7] = true;

            else

                values[7] = IntervalPGetDatum(offset_to_interval(flushLag));


            if (applyLag < 0)

                nulls[8] = true;

            else

                values[8] = IntervalPGetDatum(offset_to_interval(applyLag));


            values[9] = Int32GetDatum(priority);


            /*

             * More easily understood version of standby state. This is purely

             * informational.

             *

             * In quorum-based sync replication, the role of each standby

             * listed in synchronous_standby_names can be changing very

             * frequently. Any standbys considered as "sync" at one moment can

             * be switched to "potential" ones at the next moment. So, it's

             * basically useless to report "sync" or "potential" as their sync

             * states. We report just "quorum" for them.

             */

            if (priority == 0)

                values[10] = CStringGetTextDatum("async");

            else if (is_sync_standby)

                values[10] = SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY ?

                    CStringGetTextDatum("sync") : CStringGetTextDatum("quorum");

            else

                values[10] = CStringGetTextDatum("potential");


            if (replyTime == 0)

                nulls[11] = true;

            else

                values[11] = TimestampTzGetDatum(replyTime);

        }


        tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,

                             values, nulls);

    }


    return (Datum) 0;

}


/*

 * Send a keepalive message to standby.

 *

 * If requestReply is set, the message requests the other party to send

 * a message back to us, for heartbeat purposes.  We also set a flag to

 * let nearby code know that we're waiting for that response, to avoid

 * repeated requests.

 *

 * writePtr is the location up to which the WAL is sent. It is essentially

 * the same as sentPtr but in some cases, we need to send keep alive before

 * sentPtr is updated like when skipping empty transactions.

 */

static void

WalSndKeepalive(bool requestReply, XLogRecPtr writePtr)

{

    elog(DEBUG2, "sending replication keepalive");


    /* construct the message... */

    resetStringInfo(&output_message);

    pq_sendbyte(&output_message, 'k');

    pq_sendint64(&output_message, XLogRecPtrIsInvalid(writePtr) ? sentPtr : writePtr);

    pq_sendint64(&output_message, GetCurrentTimestamp());

    pq_sendbyte(&output_message, requestReply ? 1 : 0);


    /* ... and send it wrapped in CopyData */

    pq_putmessage_noblock('d', output_message.data, output_message.len);


    /* Set local flag */

    if (requestReply)

        waiting_for_ping_response = true;

}


/*

 * Send keepalive message if too much time has elapsed.

 */

static void

WalSndKeepaliveIfNecessary(void)

{

    TimestampTz ping_time;


    /*

     * Don't send keepalive messages if timeouts are globally disabled or

     * we're doing something not partaking in timeouts.

     */

    if (wal_sender_timeout <= 0 || last_reply_timestamp <= 0)

        return;


    if (waiting_for_ping_response)

        return;


    /*

     * If half of wal_sender_timeout has lapsed without receiving any reply

     * from the standby, send a keep-alive message to the standby requesting

     * an immediate reply.

     */

    ping_time = TimestampTzPlusMilliseconds(last_reply_timestamp,

                                            wal_sender_timeout / 2);

    if (last_processing >= ping_time)

    {

        WalSndKeepalive(true, InvalidXLogRecPtr);


        /* Try to flush pending output to the client */

        if (pq_flush_if_writable() != 0)

            WalSndShutdown();

    }

}


/*

 * Record the end of the WAL and the time it was flushed locally, so that

 * LagTrackerRead can compute the elapsed time (lag) when this WAL location is

 * eventually reported to have been written, flushed and applied by the

 * standby in a reply message.

 */

static void

LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time)

{

    bool        buffer_full;

    int         new_write_head;

    int         i;


    if (!am_walsender)

        return;


    /*

     * If the lsn hasn't advanced since last time, then do nothing.  This way

     * we only record a new sample when new WAL has been written.

     */

    if (lag_tracker->last_lsn == lsn)

        return;

    lag_tracker->last_lsn = lsn;


    /*

     * If advancing the write head of the circular buffer would crash into any

     * of the read heads, then the buffer is full.  In other words, the

     * slowest reader (presumably apply) is the one that controls the release

     * of space.

     */

    new_write_head = (lag_tracker->write_head + 1) % LAG_TRACKER_BUFFER_SIZE;

    buffer_full = false;

    for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; ++i)

    {

        if (new_write_head == lag_tracker->read_heads[i])

            buffer_full = true;

    }


    /*

     * If the buffer is full, for now we just rewind by one slot and overwrite

     * the last sample, as a simple (if somewhat uneven) way to lower the

     * sampling rate.  There may be better adaptive compaction algorithms.

     */

    if (buffer_full)

    {

        new_write_head = lag_tracker->write_head;

        if (lag_tracker->write_head > 0)

            lag_tracker->write_head--;

        else

            lag_tracker->write_head = LAG_TRACKER_BUFFER_SIZE - 1;

    }


    /* Store a sample at the current write head position. */

    lag_tracker->buffer[lag_tracker->write_head].lsn = lsn;

    lag_tracker->buffer[lag_tracker->write_head].time = local_flush_time;

    lag_tracker->write_head = new_write_head;

}


/*

 * Find out how much time has elapsed between the moment WAL location 'lsn'

 * (or the highest known earlier LSN) was flushed locally and the time 'now'.

 * We have a separate read head for each of the reported LSN locations we

 * receive in replies from standby; 'head' controls which read head is

 * used.  Whenever a read head crosses an LSN which was written into the

 * lag buffer with LagTrackerWrite, we can use the associated timestamp to

 * find out the time this LSN (or an earlier one) was flushed locally, and

 * therefore compute the lag.

 *

 * Return -1 if no new sample data is available, and otherwise the elapsed

 * time in microseconds.

 */

static TimeOffset

LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now)

{

    TimestampTz time = 0;


    /* Read all unread samples up to this LSN or end of buffer. */

    while (lag_tracker->read_heads[head] != lag_tracker->write_head &&

           lag_tracker->buffer[lag_tracker->read_heads[head]].lsn <= lsn)

    {

        time = lag_tracker->buffer[lag_tracker->read_heads[head]].time;

        lag_tracker->last_read[head] =

            lag_tracker->buffer[lag_tracker->read_heads[head]];

        lag_tracker->read_heads[head] =

            (lag_tracker->read_heads[head] + 1) % LAG_TRACKER_BUFFER_SIZE;

    }


    /*

     * If the lag tracker is empty, that means the standby has processed

     * everything we've ever sent so we should now clear 'last_read'.  If we

     * didn't do that, we'd risk using a stale and irrelevant sample for

     * interpolation at the beginning of the next burst of WAL after a period

     * of idleness.

     */

    if (lag_tracker->read_heads[head] == lag_tracker->write_head)

        lag_tracker->last_read[head].time = 0;


    if (time > now)

    {

        /* If the clock somehow went backwards, treat as not found. */

        return -1;

    }

    else if (time == 0)

    {

        /*

         * We didn't cross a time.  If there is a future sample that we

         * haven't reached yet, and we've already reached at least one sample,

         * let's interpolate the local flushed time.  This is mainly useful

         * for reporting a completely stuck apply position as having

         * increasing lag, since otherwise we'd have to wait for it to

         * eventually start moving again and cross one of our samples before

         * we can show the lag increasing.

         */

        if (lag_tracker->read_heads[head] == lag_tracker->write_head)

        {

            /* There are no future samples, so we can't interpolate. */

            return -1;

        }

        else if (lag_tracker->last_read[head].time != 0)

        {

            /* We can interpolate between last_read and the next sample. */

            double      fraction;

            WalTimeSample prev = lag_tracker->last_read[head];

            WalTimeSample next = lag_tracker->buffer[lag_tracker->read_heads[head]];


            if (lsn < prev.lsn)

            {

                /*

                 * Reported LSNs shouldn't normally go backwards, but it's

                 * possible when there is a timeline change.  Treat as not

                 * found.

                 */

                return -1;

            }


            Assert(prev.lsn < next.lsn);


            if (prev.time > next.time)

            {

                /* If the clock somehow went backwards, treat as not found. */

                return -1;

            }


            /* See how far we are between the previous and next samples. */

            fraction =

                (double) (lsn - prev.lsn) / (double) (next.lsn - prev.lsn);


            /* Scale the local flush time proportionally. */

            time = (TimestampTz)

                ((double) prev.time + (next.time - prev.time) * fraction);

        }

        else

        {

            /*

             * We have only a future sample, implying that we were entirely

             * caught up but and now there is a new burst of WAL and the

             * standby hasn't processed the first sample yet.  Until the

             * standby reaches the future sample the best we can do is report

             * the hypothetical lag if that sample were to be replayed now.

             */

            time = lag_tracker->buffer[lag_tracker->read_heads[head]].time;

        }

    }


    /* Return the elapsed time since local flush time in microseconds. */

    Assert(time != 0);

    return now - time;

}

has_privs_of_role
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:5268

acl.h

pgaio_error_cleanup
void pgaio_error_cleanup(void)
Definition: aio.c:1145

aio_subsys.h

AttrNumber
int16 AttrNumber
Definition: attnum.h:21

readTimeLineHistory
List * readTimeLineHistory(TimeLineID targetTLI)
Definition: timeline.c:76

tliOfPointInHistory
TimeLineID tliOfPointInHistory(XLogRecPtr ptr, List *history)
Definition: timeline.c:544

tliSwitchPoint
XLogRecPtr tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
Definition: timeline.c:572

TimestampDifferenceMilliseconds
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition: timestamp.c:1757

TimestampDifferenceExceeds
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1781

GetCurrentTimestamp
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645

timestamptz_to_str
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1862

now
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609

pgstat_report_activity
void pgstat_report_activity(BackendState state, const char *cmd_str)
Definition: backend_status.c:572

STATE_RUNNING
@ STATE_RUNNING
Definition: backend_status.h:29

SendBaseBackup
void SendBaseBackup(BaseBackupCmd *cmd, IncrementalBackupInfo *ib)
Definition: basebackup.c:990

basebackup.h

AppendIncrementalManifestData
void AppendIncrementalManifestData(IncrementalBackupInfo *ib, const char *data, int len)
Definition: basebackup_incremental.c:194

CreateIncrementalBackupInfo
IncrementalBackupInfo * CreateIncrementalBackupInfo(MemoryContext mcxt)
Definition: basebackup_incremental.c:152

FinalizeIncrementalManifest
void FinalizeIncrementalManifest(IncrementalBackupInfo *ib)
Definition: basebackup_incremental.c:227

basebackup_incremental.h

next
static int32 next
Definition: blutils.c:224

values
static Datum values[MAXATTR]
Definition: bootstrap.c:151

builtins.h

CStringGetTextDatum
#define CStringGetTextDatum(s)
Definition: builtins.h:97

NameStr
#define NameStr(name)
Definition: c.h:717

pg_noreturn
#define pg_noreturn
Definition: c.h:165

SIGNAL_ARGS
#define SIGNAL_ARGS
Definition: c.h:1320

int64
int64_t int64
Definition: c.h:499

PG_BINARY
#define PG_BINARY
Definition: c.h:1244

UINT64_FORMAT
#define UINT64_FORMAT
Definition: c.h:521

uint32
uint32_t uint32
Definition: c.h:502

MemSet
#define MemSet(start, val, len)
Definition: c.h:991

TransactionId
uint32 TransactionId
Definition: c.h:623

OidIsValid
#define OidIsValid(objectId)
Definition: c.h:746

Size
size_t Size
Definition: c.h:576

SetQueryCompletion
static void SetQueryCompletion(QueryCompletion *qc, CommandTag commandTag, uint64 nprocessed)
Definition: cmdtag.h:37

ConditionVariableCancelSleep
bool ConditionVariableCancelSleep(void)
Definition: condition_variable.c:230

ConditionVariableBroadcast
void ConditionVariableBroadcast(ConditionVariable *cv)
Definition: condition_variable.c:282

ConditionVariablePrepareToSleep
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
Definition: condition_variable.c:56

ConditionVariableInit
void ConditionVariableInit(ConditionVariable *cv)
Definition: condition_variable.c:35

condition_variable.h

yyscan_t
void * yyscan_t
Definition: cubedata.h:67

TimestampTz
int64 TimestampTz
Definition: timestamp.h:39

TimeOffset
int64 TimeOffset
Definition: timestamp.h:40

get_database_name
char * get_database_name(Oid dbid)
Definition: dbcommands.c:3188

dbcommands.h

LogicalDecodingProcessRecord
void LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *record)
Definition: decode.c:88

decode.h

defGetString
char * defGetString(DefElem *def)
Definition: define.c:35

defGetBoolean
bool defGetBoolean(DefElem *def)
Definition: define.c:94

defrem.h

EndCommand
void EndCommand(const QueryCompletion *qc, CommandDest dest, bool force_undecorated_output)
Definition: dest.c:169

CreateDestReceiver
DestReceiver * CreateDestReceiver(CommandDest dest)
Definition: dest.c:113

EndReplicationCommand
void EndReplicationCommand(const char *commandTag)
Definition: dest.c:205

dest.h

DestRemote
@ DestRemote
Definition: dest.h:89

DestRemoteSimple
@ DestRemoteSimple
Definition: dest.h:91

DestNone
@ DestNone
Definition: dest.h:87

cur
struct cursor * cur
Definition: ecpg.c:29

errmsg_internal
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1158

errcode_for_file_access
int errcode_for_file_access(void)
Definition: elog.c:877

errdetail
int errdetail(const char *fmt,...)
Definition: elog.c:1204

message_level_is_interesting
bool message_level_is_interesting(int elevel)
Definition: elog.c:273

errcode
int errcode(int sqlerrcode)
Definition: elog.c:854

errmsg
int errmsg(const char *fmt,...)
Definition: elog.c:1071

LOG
#define LOG
Definition: elog.h:31

COMMERROR
#define COMMERROR
Definition: elog.h:33

FATAL
#define FATAL
Definition: elog.h:41

WARNING
#define WARNING
Definition: elog.h:36

DEBUG2
#define DEBUG2
Definition: elog.h:29

DEBUG1
#define DEBUG1
Definition: elog.h:30

ERROR
#define ERROR
Definition: elog.h:39

elog
#define elog(elevel,...)
Definition: elog.h:225

ereport
#define ereport(elevel,...)
Definition: elog.h:149

do_tup_output
void do_tup_output(TupOutputState *tstate, const Datum *values, const bool *isnull)
Definition: execTuples.c:2464

TTSOpsVirtual
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:84

end_tup_output
void end_tup_output(TupOutputState *tstate)
Definition: execTuples.c:2522

begin_tup_output_tupdesc
TupOutputState * begin_tup_output_tupdesc(DestReceiver *dest, TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:2444

CloseTransientFile
int CloseTransientFile(int fd)
Definition: fd.c:2871

BasicOpenFile
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1089

OpenTransientFile
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2694

fd.h

Int64GetDatum
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807

PG_FUNCTION_ARGS
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193

InitMaterializedSRF
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76

funcapi.h

MyProcPid
int MyProcPid
Definition: globals.c:47

MyLatch
struct Latch * MyLatch
Definition: globals.c:63

MyDatabaseId
Oid MyDatabaseId
Definition: globals.c:94

ProcessConfigFile
void ProcessConfigFile(GucContext context)
Definition: guc-file.l:120

guc.h

PGC_SIGHUP
@ PGC_SIGHUP
Definition: guc.h:75

GetPGVariable
void GetPGVariable(const char *name, DestReceiver *dest)
Definition: guc_funcs.c:382

application_name
char * application_name
Definition: guc_tables.c:560

Assert
Assert(PointerIsAligned(start, uint64))

dlist_init
static void dlist_init(dlist_head *head)
Definition: ilist.h:314

write
#define write(a, b, c)
Definition: win32.h:14

read
#define read(a, b, c)
Definition: win32.h:13

ConfigReloadPending
volatile sig_atomic_t ConfigReloadPending
Definition: interrupt.c:27

SignalHandlerForConfigReload
void SignalHandlerForConfigReload(SIGNAL_ARGS)
Definition: interrupt.c:61

interrupt.h

on_shmem_exit
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365

proc_exit
void proc_exit(int code)
Definition: ipc.c:104

ipc.h

j
int j
Definition: isn.c:78

i
int i
Definition: isn.c:77

SetLatch
void SetLatch(Latch *latch)
Definition: latch.c:288

ResetLatch
void ResetLatch(Latch *latch)
Definition: latch.c:372

libpq.h

pq_flush
#define pq_flush()
Definition: libpq.h:46

PQ_SMALL_MESSAGE_LIMIT
#define PQ_SMALL_MESSAGE_LIMIT
Definition: libpq.h:30

pq_flush_if_writable
#define pq_flush_if_writable()
Definition: libpq.h:47

pq_is_send_pending
#define pq_is_send_pending()
Definition: libpq.h:48

PQ_LARGE_MESSAGE_LIMIT
#define PQ_LARGE_MESSAGE_LIMIT
Definition: libpq.h:31

pq_putmessage_noblock
#define pq_putmessage_noblock(msgtype, s, len)
Definition: libpq.h:51

FeBeWaitSetSocketPos
#define FeBeWaitSetSocketPos
Definition: libpq.h:63

list_free_deep
void list_free_deep(List *list)
Definition: list.c:1560

LogicalConfirmReceivedLocation
void LogicalConfirmReceivedLocation(XLogRecPtr lsn)
Definition: logical.c:1818

FreeDecodingContext
void FreeDecodingContext(LogicalDecodingContext *ctx)
Definition: logical.c:675

CreateDecodingContext
LogicalDecodingContext * CreateDecodingContext(XLogRecPtr start_lsn, List *output_plugin_options, bool fast_forward, XLogReaderRoutine *xl_routine, LogicalOutputPluginWriterPrepareWrite prepare_write, LogicalOutputPluginWriterWrite do_write, LogicalOutputPluginWriterUpdateProgress update_progress)
Definition: logical.c:496

DecodingContextFindStartpoint
void DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
Definition: logical.c:631

CreateInitDecodingContext
LogicalDecodingContext * CreateInitDecodingContext(const char *plugin, List *output_plugin_options, bool need_full_snapshot, XLogRecPtr restart_lsn, XLogReaderRoutine *xl_routine, LogicalOutputPluginWriterPrepareWrite prepare_write, LogicalOutputPluginWriterWrite do_write, LogicalOutputPluginWriterUpdateProgress update_progress)
Definition: logical.c:330

CheckLogicalDecodingRequirements
void CheckLogicalDecodingRequirements(void)
Definition: logical.c:109

logical.h

LWLockAcquire
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1180

LWLockRelease
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1900

LWLockReleaseAll
void LWLockReleaseAll(void)
Definition: lwlock.c:1951

LW_SHARED
@ LW_SHARED
Definition: lwlock.h:115

LW_EXCLUSIVE
@ LW_EXCLUSIVE
Definition: lwlock.h:114

MemoryContextStrdup
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1690

MemoryContextReset
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383

MemoryContextAllocZero
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1219

pstrdup
char * pstrdup(const char *in)
Definition: mcxt.c:1703

MemoryContextSetParent
void MemoryContextSetParent(MemoryContext context, MemoryContext new_parent)
Definition: mcxt.c:637

pfree
void pfree(void *pointer)
Definition: mcxt.c:1528

TopMemoryContext
MemoryContext TopMemoryContext
Definition: mcxt.c:149

palloc
void * palloc(Size size)
Definition: mcxt.c:1321

CurrentMemoryContext
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143

CacheMemoryContext
MemoryContext CacheMemoryContext
Definition: mcxt.c:152

MemoryContextDelete
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454

memutils.h

AllocSetContextCreate
#define AllocSetContextCreate
Definition: memutils.h:129

ALLOCSET_DEFAULT_SIZES
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160

miscadmin.h

HOLD_CANCEL_INTERRUPTS
#define HOLD_CANCEL_INTERRUPTS()
Definition: miscadmin.h:141

RESUME_CANCEL_INTERRUPTS
#define RESUME_CANCEL_INTERRUPTS()
Definition: miscadmin.h:143

CHECK_FOR_INTERRUPTS
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122

GetUserId
Oid GetUserId(void)
Definition: miscinit.c:520

generate_unaccent_rules.dest
dest
Definition: generate_unaccent_rules.py:285

generate_unaccent_rules.action
action
Definition: generate_unaccent_rules.py:287

CMD_SELECT
@ CMD_SELECT
Definition: nodes.h:271

MemoryContextSwitchTo
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124

pg_authid.h

arg
void * arg
Definition: pg_backup_utils.c:29

ERRCODE_DATA_CORRUPTED
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41

MAXPGPATH
#define MAXPGPATH
Definition: pg_config_manual.h:100

len
const void size_t len
Definition: pg_crc32c_sse42.c:28

lfirst
#define lfirst(lc)
Definition: pg_list.h:172

NIL
#define NIL
Definition: pg_list.h:68

foreach_ptr
#define foreach_ptr(type, var, lst)
Definition: pg_list.h:469

pg_lsn.h

LSNGetDatum
static Datum LSNGetDatum(XLogRecPtr X)
Definition: pg_lsn.h:28

two_phase
static bool two_phase
Definition: pg_recvlogical.c:44

failover
static bool failover
Definition: pg_recvlogical.c:45

die
#define die(msg)
Definition: pg_test_fsync.c:100

buf
static char * buf
Definition: pg_test_fsync.c:72

pg_type.h

pgstat.h

pgstat_flush_backend
bool pgstat_flush_backend(bool nowait, bits32 flags)
Definition: pgstat_backend.c:267

pgstat_internal.h

PGSTAT_BACKEND_FLUSH_IO
#define PGSTAT_BACKEND_FLUSH_IO
Definition: pgstat_internal.h:624

pgstat_flush_io
void pgstat_flush_io(bool nowait)
Definition: pgstat_io.c:183

SendPostmasterSignal
void SendPostmasterSignal(PMSignalReason reason)
Definition: pmsignal.c:165

MarkPostmasterChildWalSender
void MarkPostmasterChildWalSender(void)
Definition: pmsignal.c:309

pmsignal.h

PMSIGNAL_ADVANCE_STATE_MACHINE
@ PMSIGNAL_ADVANCE_STATE_MACHINE
Definition: pmsignal.h:43

pqsignal
#define pqsignal
Definition: port.h:531

snprintf
#define snprintf
Definition: port.h:239

StatementCancelHandler
void StatementCancelHandler(SIGNAL_ARGS)
Definition: postgres.c:3056

whereToSendOutput
CommandDest whereToSendOutput
Definition: postgres.c:91

debug_query_string
const char * debug_query_string
Definition: postgres.c:88

postgres.h

Datum
uintptr_t Datum
Definition: postgres.h:69

Int32GetDatum
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:217

InvalidOid
#define InvalidOid
Definition: postgres_ext.h:35

pq_getbyte_if_available
int pq_getbyte_if_available(unsigned char *c)
Definition: pqcomm.c:1004

pq_getmessage
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1204

FeBeWaitSet
WaitEventSet * FeBeWaitSet
Definition: pqcomm.c:166

pq_endmsgread
void pq_endmsgread(void)
Definition: pqcomm.c:1166

pq_getbyte
int pq_getbyte(void)
Definition: pqcomm.c:964

pq_startmsgread
void pq_startmsgread(void)
Definition: pqcomm.c:1142

pq_getmsgint
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:415

pq_sendbytes
void pq_sendbytes(StringInfo buf, const void *data, int datalen)
Definition: pqformat.c:126

pq_getmsgstring
const char * pq_getmsgstring(StringInfo msg)
Definition: pqformat.c:579

pq_endmessage
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:296

pq_getmsgbyte
int pq_getmsgbyte(StringInfo msg)
Definition: pqformat.c:399

pq_beginmessage
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:88

pq_getmsgint64
int64 pq_getmsgint64(StringInfo msg)
Definition: pqformat.c:453

pq_endmessage_reuse
void pq_endmessage_reuse(StringInfo buf)
Definition: pqformat.c:314

pqformat.h

pq_sendint32
static void pq_sendint32(StringInfo buf, uint32 i)
Definition: pqformat.h:144

pq_sendbyte
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:160

pq_sendint64
static void pq_sendint64(StringInfo buf, uint64 i)
Definition: pqformat.h:152

pq_sendint16
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:136

fd
static int fd(const char *x, int i)
Definition: preproc-init.c:105

proc.h

PROC_AFFECTS_ALL_HORIZONS
#define PROC_AFFECTS_ALL_HORIZONS
Definition: proc.h:62

INVALID_PROC_NUMBER
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26

SendProcSignal
int SendProcSignal(pid_t pid, ProcSignalReason reason, ProcNumber procNumber)
Definition: procsignal.c:284

procsignal_sigusr1_handler
void procsignal_sigusr1_handler(SIGNAL_ARGS)
Definition: procsignal.c:674

PROCSIG_WALSND_INIT_STOPPING
@ PROCSIG_WALSND_INIT_STOPPING
Definition: procsignal.h:35

PqMsg_CopyDone
#define PqMsg_CopyDone
Definition: protocol.h:64

PqMsg_CopyData
#define PqMsg_CopyData
Definition: protocol.h:65

PqMsg_CopyInResponse
#define PqMsg_CopyInResponse
Definition: protocol.h:45

PqMsg_CopyBothResponse
#define PqMsg_CopyBothResponse
Definition: protocol.h:54

PqMsg_DataRow
#define PqMsg_DataRow
Definition: protocol.h:43

PqMsg_Terminate
#define PqMsg_Terminate
Definition: protocol.h:28

update_process_title
bool update_process_title
Definition: ps_status.c:31

ps_status.h

set_ps_display
static void set_ps_display(const char *activity)
Definition: ps_status.h:40

replication_scanner_is_replication_command
bool replication_scanner_is_replication_command(yyscan_t yyscanner)
Definition: repl_scanner.l:299

replication_scanner_finish
void replication_scanner_finish(yyscan_t yyscanner)
Definition: repl_scanner.l:284

replication_scanner_init
void replication_scanner_init(const char *str, yyscan_t *yyscannerp)
Definition: repl_scanner.l:268

replnodes.h

REPLICATION_KIND_PHYSICAL
@ REPLICATION_KIND_PHYSICAL
Definition: replnodes.h:22

REPLICATION_KIND_LOGICAL
@ REPLICATION_KIND_LOGICAL
Definition: replnodes.h:23

ReleaseAuxProcessResources
void ReleaseAuxProcessResources(bool isCommit)
Definition: resowner.c:1019

CurrentResourceOwner
ResourceOwner CurrentResourceOwner
Definition: resowner.c:173

CreateAuxProcessResourceOwner
void CreateAuxProcessResourceOwner(void)
Definition: resowner.c:999

AuxProcessResourceOwner
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:176

add_size
Size add_size(Size s1, Size s2)
Definition: shmem.c:493

mul_size
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510

ShmemInitStruct
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387

pg_usleep
void pg_usleep(long microsec)
Definition: signal.c:53

ReplicationSlotAcquire
void ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
Definition: slot.c:559

ReplicationSlotCreate
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition: slot.c:324

ReplicationSlotMarkDirty
void ReplicationSlotMarkDirty(void)
Definition: slot.c:1061

ReplicationSlotReserveWal
void ReplicationSlotReserveWal(void)
Definition: slot.c:1452

ReplicationSlotsComputeRequiredXmin
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition: slot.c:1100

ReplicationSlotPersist
void ReplicationSlotPersist(void)
Definition: slot.c:1078

MyReplicationSlot
ReplicationSlot * MyReplicationSlot
Definition: slot.c:147

ReplicationSlotDrop
void ReplicationSlotDrop(const char *name, bool nowait)
Definition: slot.c:814

SlotExistsInSyncStandbySlots
bool SlotExistsInSyncStandbySlots(const char *slot_name)
Definition: slot.c:2772

ReplicationSlotSave
void ReplicationSlotSave(void)
Definition: slot.c:1043

SearchNamedReplicationSlot
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition: slot.c:479

ReplicationSlotAlter
void ReplicationSlotAlter(const char *name, const bool *failover, const bool *two_phase)
Definition: slot.c:837

ReplicationSlotRelease
void ReplicationSlotRelease(void)
Definition: slot.c:686

StandbySlotsHaveCaughtup
bool StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
Definition: slot.c:2805

ReplicationSlotsComputeRequiredLSN
void ReplicationSlotsComputeRequiredLSN(void)
Definition: slot.c:1156

ReplicationSlotCleanup
void ReplicationSlotCleanup(bool synced_only)
Definition: slot.c:775

slot.h

RS_PERSISTENT
@ RS_PERSISTENT
Definition: slot.h:38

RS_EPHEMERAL
@ RS_EPHEMERAL
Definition: slot.h:39

RS_TEMPORARY
@ RS_TEMPORARY
Definition: slot.h:40

SlotIsPhysical
#define SlotIsPhysical(slot)
Definition: slot.h:220

SlotIsLogical
#define SlotIsLogical(slot)
Definition: slot.h:221

IsSyncingReplicationSlots
bool IsSyncingReplicationSlots(void)
Definition: slotsync.c:1668

slotsync.h

SnapBuildInitialSnapshot
Snapshot SnapBuildInitialSnapshot(SnapBuild *builder)
Definition: snapbuild.c:440

SnapBuildExportSnapshot
const char * SnapBuildExportSnapshot(SnapBuild *builder)
Definition: snapbuild.c:539

SnapBuildClearExportedSnapshot
void SnapBuildClearExportedSnapshot(void)
Definition: snapbuild.c:600

snapbuild.h

FirstSnapshotSet
bool FirstSnapshotSet
Definition: snapmgr.c:192

RestoreTransactionSnapshot
void RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
Definition: snapmgr.c:1843

SpinLockInit
#define SpinLockInit(lock)
Definition: spin.h:57

SpinLockRelease
#define SpinLockRelease(lock)
Definition: spin.h:61

SpinLockAcquire
#define SpinLockAcquire(lock)
Definition: spin.h:59

MyProc
PGPROC * MyProc
Definition: proc.c:66

ProcGlobal
PROC_HDR * ProcGlobal
Definition: proc.c:78

dbname
char * dbname
Definition: streamutil.c:49

resetStringInfo
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:126

enlargeStringInfo
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:337

initStringInfo
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97

AlterReplicationSlotCmd
Definition: replnodes.h:80

AlterReplicationSlotCmd::options
List * options
Definition: replnodes.h:83

AlterReplicationSlotCmd::slotname
char * slotname
Definition: replnodes.h:82

BaseBackupCmd
Definition: replnodes.h:42

CreateReplicationSlotCmd
Definition: replnodes.h:53

CreateReplicationSlotCmd::kind
ReplicationKind kind
Definition: replnodes.h:56

CreateReplicationSlotCmd::options
List * options
Definition: replnodes.h:59

CreateReplicationSlotCmd::slotname
char * slotname
Definition: replnodes.h:55

CreateReplicationSlotCmd::temporary
bool temporary
Definition: replnodes.h:58

CreateReplicationSlotCmd::plugin
char * plugin
Definition: replnodes.h:57

DefElem
Definition: parsenodes.h:823

DefElem::defname
char * defname
Definition: parsenodes.h:826

DropReplicationSlotCmd
Definition: replnodes.h:68

DropReplicationSlotCmd::wait
bool wait
Definition: replnodes.h:71

DropReplicationSlotCmd::slotname
char * slotname
Definition: replnodes.h:70

FullTransactionId
Definition: transam.h:66

IncrementalBackupInfo
Definition: basebackup_incremental.c:75

Interval
Definition: timestamp.h:48

Interval::day
int32 day
Definition: timestamp.h:51

Interval::month
int32 month
Definition: timestamp.h:52

Interval::time
TimeOffset time
Definition: timestamp.h:49

LagTracker
Definition: walsender.c:227

LagTracker::buffer
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:229

LagTracker::read_heads
int read_heads[NUM_SYNC_REP_WAIT_MODE]
Definition: walsender.c:231

LagTracker::last_read
WalTimeSample last_read[NUM_SYNC_REP_WAIT_MODE]
Definition: walsender.c:232

LagTracker::write_head
int write_head
Definition: walsender.c:230

LagTracker::last_lsn
XLogRecPtr last_lsn
Definition: walsender.c:228

List
Definition: pg_list.h:54

LogicalDecodingContext
Definition: logical.h:34

LogicalDecodingContext::reader
XLogReaderState * reader
Definition: logical.h:42

LogicalDecodingContext::snapshot_builder
struct SnapBuild * snapshot_builder
Definition: logical.h:44

LogicalDecodingContext::out
StringInfo out
Definition: logical.h:71

LogicalDecodingContext::end_xact
bool end_xact
Definition: logical.h:111

MemoryContextData
Definition: memnodes.h:118

Node
Definition: nodes.h:135

Node::type
NodeTag type
Definition: nodes.h:136

PGPROC::xmin
TransactionId xmin
Definition: proc.h:186

PGPROC::statusFlags
uint8 statusFlags
Definition: proc.h:251

PGPROC::pgxactoff
int pgxactoff
Definition: proc.h:193

PROC_HDR::statusFlags
uint8 * statusFlags
Definition: proc.h:395

QueryCompletion
Definition: cmdtag.h:30

ReadReplicationSlotCmd
Definition: replnodes.h:107

ReadReplicationSlotCmd::type
NodeTag type
Definition: replnodes.h:108

ReadReplicationSlotCmd::slotname
char * slotname
Definition: replnodes.h:109

ReplicationSlotPersistentData::database
Oid database
Definition: slot.h:76

ReplicationSlotPersistentData::xmin
TransactionId xmin
Definition: slot.h:89

ReplicationSlotPersistentData::catalog_xmin
TransactionId catalog_xmin
Definition: slot.h:97

ReplicationSlotPersistentData::restart_lsn
XLogRecPtr restart_lsn
Definition: slot.h:100

ReplicationSlotPersistentData::confirmed_flush
XLogRecPtr confirmed_flush
Definition: slot.h:111

ReplicationSlotPersistentData::failover
bool failover
Definition: slot.h:136

ReplicationSlotPersistentData::name
NameData name
Definition: slot.h:73

ReplicationSlot
Definition: slot.h:156

ReplicationSlot::effective_catalog_xmin
TransactionId effective_catalog_xmin
Definition: slot.h:182

ReplicationSlot::mutex
slock_t mutex
Definition: slot.h:158

ReplicationSlot::in_use
bool in_use
Definition: slot.h:161

ReplicationSlot::effective_xmin
TransactionId effective_xmin
Definition: slot.h:181

ReplicationSlot::data
ReplicationSlotPersistentData data
Definition: slot.h:185

ReturnSetInfo
Definition: execnodes.h:347

ReturnSetInfo::setDesc
TupleDesc setDesc
Definition: execnodes.h:358

ReturnSetInfo::setResult
Tuplestorestate * setResult
Definition: execnodes.h:357

SnapshotData
Definition: snapshot.h:139

StartReplicationCmd
Definition: replnodes.h:92

StartReplicationCmd::startpoint
XLogRecPtr startpoint
Definition: replnodes.h:97

StartReplicationCmd::options
List * options
Definition: replnodes.h:98

StartReplicationCmd::slotname
char * slotname
Definition: replnodes.h:95

StartReplicationCmd::kind
ReplicationKind kind
Definition: replnodes.h:94

StartReplicationCmd::timeline
TimeLineID timeline
Definition: replnodes.h:96

StringInfoData
Definition: stringinfo.h:47

StringInfoData::data
char * data
Definition: stringinfo.h:48

StringInfoData::len
int len
Definition: stringinfo.h:49

SyncRepConfigData::syncrep_method
uint8 syncrep_method
Definition: syncrep.h:68

SyncRepStandbyData
Definition: syncrep.h:43

TimeLineHistoryCmd
Definition: replnodes.h:118

TimeLineHistoryCmd::timeline
TimeLineID timeline
Definition: replnodes.h:120

TupOutputState
Definition: executor.h:605

TupleDescData
Definition: tupdesc.h:136

VariableShowStmt
Definition: parsenodes.h:2720

VariableShowStmt::name
char * name
Definition: parsenodes.h:2722

WALOpenSegment::ws_tli
TimeLineID ws_tli
Definition: xlogreader.h:49

WALOpenSegment::ws_file
int ws_file
Definition: xlogreader.h:47

WALReadError
Definition: xlogreader.h:383

WALSegmentContext::ws_segsize
int ws_segsize
Definition: xlogreader.h:56

WaitEvent
Definition: waiteventset.h:60

WaitEvent::events
uint32 events
Definition: waiteventset.h:62

WalSndCtlData
Definition: walsender_private.h:85

WalSndCtlData::wal_confirm_rcv_cv
ConditionVariable wal_confirm_rcv_cv
Definition: walsender_private.h:114

WalSndCtlData::walsnds
WalSnd walsnds[FLEXIBLE_ARRAY_MEMBER]
Definition: walsender_private.h:116

WalSndCtlData::wal_replay_cv
ConditionVariable wal_replay_cv
Definition: walsender_private.h:107

WalSndCtlData::SyncRepQueue
dlist_head SyncRepQueue[NUM_SYNC_REP_WAIT_MODE]
Definition: walsender_private.h:90

WalSndCtlData::wal_flush_cv
ConditionVariable wal_flush_cv
Definition: walsender_private.h:106

WalSnd
Definition: walsender_private.h:42

WalSnd::writeLag
TimeOffset writeLag
Definition: walsender_private.h:60

WalSnd::mutex
slock_t mutex
Definition: walsender_private.h:71

WalSnd::flush
XLogRecPtr flush
Definition: walsender_private.h:56

WalSnd::sentPtr
XLogRecPtr sentPtr
Definition: walsender_private.h:46

WalSnd::pid
pid_t pid
Definition: walsender_private.h:43

WalSnd::flushLag
TimeOffset flushLag
Definition: walsender_private.h:61

WalSnd::state
WalSndState state
Definition: walsender_private.h:45

WalSnd::kind
ReplicationKind kind
Definition: walsender_private.h:78

WalSnd::write
XLogRecPtr write
Definition: walsender_private.h:55

WalSnd::applyLag
TimeOffset applyLag
Definition: walsender_private.h:62

WalSnd::sync_standby_priority
int sync_standby_priority
Definition: walsender_private.h:68

WalSnd::needreload
bool needreload
Definition: walsender_private.h:47

WalSnd::replyTime
TimestampTz replyTime
Definition: walsender_private.h:76

WalSnd::apply
XLogRecPtr apply
Definition: walsender_private.h:57

WalTimeSample
Definition: walsender.c:217

WalTimeSample::time
TimestampTz time
Definition: walsender.c:219

WalTimeSample::lsn
XLogRecPtr lsn
Definition: walsender.c:218

XLogReaderState
Definition: xlogreader.h:176

XLogReaderState::segcxt
WALSegmentContext segcxt
Definition: xlogreader.h:271

XLogReaderState::EndRecPtr
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207

XLogReaderState::seg
WALOpenSegment seg
Definition: xlogreader.h:272

XLogRecord
Definition: xlogrecord.h:42

_DestReceiver
Definition: dest.h:116

state
Definition: regguts.h:323

SyncRepInitConfig
void SyncRepInitConfig(void)
Definition: syncrep.c:445

SyncRepConfig
SyncRepConfigData * SyncRepConfig
Definition: syncrep.c:97

SyncRepGetCandidateStandbys
int SyncRepGetCandidateStandbys(SyncRepStandbyData **standbys)
Definition: syncrep.c:754

SyncRepReleaseWaiters
void SyncRepReleaseWaiters(void)
Definition: syncrep.c:474

syncrep.h

SYNC_REP_PRIORITY
#define SYNC_REP_PRIORITY
Definition: syncrep.h:35

NUM_SYNC_REP_WAIT_MODE
#define NUM_SYNC_REP_WAIT_MODE
Definition: syncrep.h:27

SyncRepRequested
#define SyncRepRequested()
Definition: syncrep.h:18

SYNC_REP_WAIT_WRITE
#define SYNC_REP_WAIT_WRITE
Definition: syncrep.h:23

SYNC_REP_WAIT_FLUSH
#define SYNC_REP_WAIT_FLUSH
Definition: syncrep.h:24

SYNC_REP_WAIT_APPLY
#define SYNC_REP_WAIT_APPLY
Definition: syncrep.h:25

tcopprot.h

timeline.h

InitializeTimeouts
void InitializeTimeouts(void)
Definition: timeout.c:470

timeout.h

TransactionIdPrecedes
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280

TransactionIdPrecedesOrEquals
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299

transam.h

InvalidTransactionId
#define InvalidTransactionId
Definition: transam.h:31

EpochFromFullTransactionId
#define EpochFromFullTransactionId(x)
Definition: transam.h:47

XidFromFullTransactionId
#define XidFromFullTransactionId(x)
Definition: transam.h:48

TransactionIdIsNormal
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

CreateTemplateTupleDesc
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:175

TupleDescInitBuiltinEntry
void TupleDescInitBuiltinEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:911

tuplestore_putvalues
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784

ListCell
Definition: pg_list.h:46

PGAlignedBlock
Definition: c.h:1089

PGAlignedBlock::data
char data[BLCKSZ]
Definition: c.h:1090

unistd.h

timestamp.h

TimestampTzGetDatum
static Datum TimestampTzGetDatum(TimestampTz X)
Definition: timestamp.h:52

IntervalPGetDatum
static Datum IntervalPGetDatum(const Interval *X)
Definition: timestamp.h:58

TimestampTzPlusMilliseconds
#define TimestampTzPlusMilliseconds(tz, ms)
Definition: timestamp.h:85

ReadNextFullTransactionId
FullTransactionId ReadNextFullTransactionId(void)
Definition: varsup.c:288

pgstat_report_wait_start
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:69

pgstat_report_wait_end
static void pgstat_report_wait_end(void)
Definition: wait_event.h:85

ModifyWaitEvent
void ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
Definition: waiteventset.c:655

WaitEventSetWait
int WaitEventSetWait(WaitEventSet *set, long timeout, WaitEvent *occurred_events, int nevents, uint32 wait_event_info)
Definition: waiteventset.c:1037

WL_SOCKET_READABLE
#define WL_SOCKET_READABLE
Definition: waiteventset.h:35

WL_POSTMASTER_DEATH
#define WL_POSTMASTER_DEATH
Definition: waiteventset.h:38

WL_SOCKET_WRITEABLE
#define WL_SOCKET_WRITEABLE
Definition: waiteventset.h:36

walreceiver.h

GetWalRcvFlushRecPtr
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
Definition: walreceiverfuncs.c:332

ProcessPendingWrites
static void ProcessPendingWrites(void)
Definition: walsender.c:1595

sentPtr
static XLogRecPtr sentPtr
Definition: walsender.c:170

READ_REPLICATION_SLOT_COLS
#define READ_REPLICATION_SLOT_COLS

AlterReplicationSlot
static void AlterReplicationSlot(AlterReplicationSlotCmd *cmd)
Definition: walsender.c:1391

WalSndWait
static void WalSndWait(uint32 socket_events, long timeout, uint32 wait_event)
Definition: walsender.c:3716

WalSndLastCycleHandler
static void WalSndLastCycleHandler(SIGNAL_ARGS)
Definition: walsender.c:3611

got_SIGUSR2
static volatile sig_atomic_t got_SIGUSR2
Definition: walsender.c:202

WalSndCheckTimeOut
static void WalSndCheckTimeOut(void)
Definition: walsender.c:2756

XLogSendPhysical
static void XLogSendPhysical(void)
Definition: walsender.c:3095

ProcessRepliesIfAny
static void ProcessRepliesIfAny(void)
Definition: walsender.c:2226

waiting_for_ping_response
static bool waiting_for_ping_response
Definition: walsender.c:187

PhysicalWakeupLogicalWalSnd
void PhysicalWakeupLogicalWalSnd(void)
Definition: walsender.c:1714

SendTimeLineHistory
static void SendTimeLineHistory(TimeLineHistoryCmd *cmd)
Definition: walsender.c:563

WalSndErrorCleanup
void WalSndErrorCleanup(void)
Definition: walsender.c:330

InitWalSenderSlot
static void InitWalSenderSlot(void)
Definition: walsender.c:2925

parseCreateReplSlotOptions
static void parseCreateReplSlotOptions(CreateReplicationSlotCmd *cmd, bool *reserve_wal, CRSSnapshotAction *snapshot_action, bool *two_phase, bool *failover)
Definition: walsender.c:1100

MyWalSnd
WalSnd * MyWalSnd
Definition: walsender.c:117

ProcessStandbyHSFeedbackMessage
static void ProcessStandbyHSFeedbackMessage(void)
Definition: walsender.c:2588

ReadReplicationSlot
static void ReadReplicationSlot(ReadReplicationSlotCmd *cmd)
Definition: walsender.c:464

tmpbuf
static StringInfoData tmpbuf
Definition: walsender.c:175

PhysicalReplicationSlotNewXmin
static void PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbackCatalogXmin)
Definition: walsender.c:2508

lag_tracker
static LagTracker * lag_tracker
Definition: walsender.c:235

PhysicalConfirmReceivedLocation
static void PhysicalConfirmReceivedLocation(XLogRecPtr lsn)
Definition: walsender.c:2370

IdentifySystem
static void IdentifySystem(void)
Definition: walsender.c:383

WalSndSegmentOpen
static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p)
Definition: walsender.c:3017

reply_message
static StringInfoData reply_message
Definition: walsender.c:174

WalSndKeepaliveIfNecessary
static void WalSndKeepaliveIfNecessary(void)
Definition: walsender.c:4086

am_walsender
bool am_walsender
Definition: walsender.c:120

WalSndSetState
void WalSndSetState(WalSndState state)
Definition: walsender.c:3838

output_message
static StringInfoData output_message
Definition: walsender.c:173

sendTimeLine
static TimeLineID sendTimeLine
Definition: walsender.c:161

HandleUploadManifestPacket
static bool HandleUploadManifestPacket(StringInfo buf, off_t *offset, IncrementalBackupInfo *ib)
Definition: walsender.c:719

WalSndLoop
static void WalSndLoop(WalSndSendDataCallback send_data)
Definition: walsender.c:2783

WalSndWriteData
static void WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, bool last_write)
Definition: walsender.c:1553

WalSndWakeup
void WalSndWakeup(bool physical, bool logical)
Definition: walsender.c:3695

logical_decoding_ctx
static LogicalDecodingContext * logical_decoding_ctx
Definition: walsender.c:213

XLogSendLogical
static void XLogSendLogical(void)
Definition: walsender.c:3405

WalSndShmemInit
void WalSndShmemInit(void)
Definition: walsender.c:3650

am_db_walsender
bool am_db_walsender
Definition: walsender.c:123

replication_active
static volatile sig_atomic_t replication_active
Definition: walsender.c:211

UploadManifest
static void UploadManifest(void)
Definition: walsender.c:653

wake_wal_senders
bool wake_wal_senders
Definition: walsender.c:135

got_STOPPING
static volatile sig_atomic_t got_STOPPING
Definition: walsender.c:203

max_wal_senders
int max_wal_senders
Definition: walsender.c:126

TransactionIdInRecentPast
static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch)
Definition: walsender.c:2557

WalSndUpdateProgress
static void WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, bool skipped_xact)
Definition: walsender.c:1649

exec_replication_command
bool exec_replication_command(const char *cmd_string)
Definition: walsender.c:1970

WALSND_LOGICAL_LAG_TRACK_INTERVAL_MS
#define WALSND_LOGICAL_LAG_TRACK_INTERVAL_MS

NeedToWaitForStandbys
static bool NeedToWaitForStandbys(XLogRecPtr flushed_lsn, uint32 *wait_event)
Definition: walsender.c:1739

InitWalSender
void InitWalSender(void)
Definition: walsender.c:283

PG_STAT_GET_WAL_SENDERS_COLS
#define PG_STAT_GET_WAL_SENDERS_COLS

WalSndSendDataCallback
void(* WalSndSendDataCallback)(void)
Definition: walsender.c:241

pg_stat_get_wal_senders
Datum pg_stat_get_wal_senders(PG_FUNCTION_ARGS)
Definition: walsender.c:3892

WalSndInitStopping
void WalSndInitStopping(void)
Definition: walsender.c:3774

WalSndWaitStopping
void WalSndWaitStopping(void)
Definition: walsender.c:3800

sendTimeLineIsHistoric
static bool sendTimeLineIsHistoric
Definition: walsender.c:163

WalSndRqstFileReload
void WalSndRqstFileReload(void)
Definition: walsender.c:3566

WalSndWaitForWal
static XLogRecPtr WalSndWaitForWal(XLogRecPtr loc)
Definition: walsender.c:1799

am_cascading_walsender
bool am_cascading_walsender
Definition: walsender.c:121

last_processing
static TimestampTz last_processing
Definition: walsender.c:178

NeedToWaitForWal
static bool NeedToWaitForWal(XLogRecPtr target_lsn, XLogRecPtr flushed_lsn, uint32 *wait_event)
Definition: walsender.c:1771

WalSndShmemSize
Size WalSndShmemSize(void)
Definition: walsender.c:3638

log_replication_commands
bool log_replication_commands
Definition: walsender.c:130

HandleWalSndInitStopping
void HandleWalSndInitStopping(void)
Definition: walsender.c:3589

sendTimeLineNextTLI
static TimeLineID sendTimeLineNextTLI
Definition: walsender.c:162

uploaded_manifest_mcxt
static MemoryContext uploaded_manifest_mcxt
Definition: walsender.c:153

CreateReplicationSlot
static void CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
Definition: walsender.c:1177

logical_read_xlog_page
static int logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
Definition: walsender.c:1027

ProcessStandbyReplyMessage
static void ProcessStandbyReplyMessage(void)
Definition: walsender.c:2403

WalSndKeepalive
static void WalSndKeepalive(bool requestReply, XLogRecPtr writePtr)
Definition: walsender.c:4063

LagTrackerWrite
static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time)
Definition: walsender.c:4124

WALSENDER_STATS_FLUSH_INTERVAL
#define WALSENDER_STATS_FLUSH_INTERVAL
Definition: walsender.c:100

WalSndSignals
void WalSndSignals(void)
Definition: walsender.c:3619

streamingDoneSending
static bool streamingDoneSending
Definition: walsender.c:195

StartLogicalReplication
static void StartLogicalReplication(StartReplicationCmd *cmd)
Definition: walsender.c:1433

uploaded_manifest
static IncrementalBackupInfo * uploaded_manifest
Definition: walsender.c:152

WalSndShutdown
static pg_noreturn void WalSndShutdown(void)
Definition: walsender.c:366

WalSndKill
static void WalSndKill(int code, Datum arg)
Definition: walsender.c:3001

wal_sender_timeout
int wal_sender_timeout
Definition: walsender.c:128

MAX_SEND_SIZE
#define MAX_SEND_SIZE
Definition: walsender.c:111

offset_to_interval
static Interval * offset_to_interval(TimeOffset offset)
Definition: walsender.c:3876

WalSndCaughtUp
static bool WalSndCaughtUp
Definition: walsender.c:199

sendTimeLineValidUpto
static XLogRecPtr sendTimeLineValidUpto
Definition: walsender.c:164

ProcessStandbyMessage
static void ProcessStandbyMessage(void)
Definition: walsender.c:2339

WalSndPrepareWrite
static void WalSndPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, bool last_write)
Definition: walsender.c:1526

DropReplicationSlot
static void DropReplicationSlot(DropReplicationSlotCmd *cmd)
Definition: walsender.c:1382

LAG_TRACKER_BUFFER_SIZE
#define LAG_TRACKER_BUFFER_SIZE
Definition: walsender.c:223

WalSndGetStateString
static const char * WalSndGetStateString(WalSndState state)
Definition: walsender.c:3857

LagTrackerRead
static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now)
Definition: walsender.c:4189

WalSndComputeSleeptime
static long WalSndComputeSleeptime(TimestampTz now)
Definition: walsender.c:2712

streamingDoneReceiving
static bool streamingDoneReceiving
Definition: walsender.c:196

StartReplication
static void StartReplication(StartReplicationCmd *cmd)
Definition: walsender.c:795

WalSndDone
static void WalSndDone(WalSndSendDataCallback send_data)
Definition: walsender.c:3490

xlogreader
static XLogReaderState * xlogreader
Definition: walsender.c:142

last_reply_timestamp
static TimestampTz last_reply_timestamp
Definition: walsender.c:184

GetStandbyFlushRecPtr
XLogRecPtr GetStandbyFlushRecPtr(TimeLineID *tli)
Definition: walsender.c:3533

WalSndCtl
WalSndCtlData * WalSndCtl
Definition: walsender.c:114

walsender.h

CRSSnapshotAction
CRSSnapshotAction
Definition: walsender.h:21

CRS_USE_SNAPSHOT
@ CRS_USE_SNAPSHOT
Definition: walsender.h:24

CRS_NOEXPORT_SNAPSHOT
@ CRS_NOEXPORT_SNAPSHOT
Definition: walsender.h:23

CRS_EXPORT_SNAPSHOT
@ CRS_EXPORT_SNAPSHOT
Definition: walsender.h:22

walsender_private.h

SYNC_STANDBY_DEFINED
#define SYNC_STANDBY_DEFINED
Definition: walsender_private.h:132

WalSndState
WalSndState
Definition: walsender_private.h:25

WALSNDSTATE_STREAMING
@ WALSNDSTATE_STREAMING
Definition: walsender_private.h:29

WALSNDSTATE_BACKUP
@ WALSNDSTATE_BACKUP
Definition: walsender_private.h:27

WALSNDSTATE_CATCHUP
@ WALSNDSTATE_CATCHUP
Definition: walsender_private.h:28

WALSNDSTATE_STARTUP
@ WALSNDSTATE_STARTUP
Definition: walsender_private.h:26

WALSNDSTATE_STOPPING
@ WALSNDSTATE_STOPPING
Definition: walsender_private.h:30

replication_yyparse
int replication_yyparse(Node **replication_parse_result_p, yyscan_t yyscanner)

SIGCHLD
#define SIGCHLD
Definition: win32_port.h:168

SIGHUP
#define SIGHUP
Definition: win32_port.h:158

SIGPIPE
#define SIGPIPE
Definition: win32_port.h:163

kill
#define kill(pid, sig)
Definition: win32_port.h:493

SIGUSR1
#define SIGUSR1
Definition: win32_port.h:170

SIGUSR2
#define SIGUSR2
Definition: win32_port.h:171

epoch
static const unsigned __int64 epoch
Definition: win32gettimeofday.c:36

IsTransactionOrTransactionBlock
bool IsTransactionOrTransactionBlock(void)
Definition: xact.c:4989

XactReadOnly
bool XactReadOnly
Definition: xact.c:82

PreventInTransactionBlock
void PreventInTransactionBlock(bool isTopLevel, const char *stmtType)
Definition: xact.c:3648

StartTransactionCommand
void StartTransactionCommand(void)
Definition: xact.c:3059

IsAbortedTransactionBlockState
bool IsAbortedTransactionBlockState(void)
Definition: xact.c:407

XactIsoLevel
int XactIsoLevel
Definition: xact.c:79

IsSubTransaction
bool IsSubTransaction(void)
Definition: xact.c:5044

IsTransactionBlock
bool IsTransactionBlock(void)
Definition: xact.c:4971

CommitTransactionCommand
void CommitTransactionCommand(void)
Definition: xact.c:3157

xact.h

XACT_REPEATABLE_READ
#define XACT_REPEATABLE_READ
Definition: xact.h:38

GetSystemIdentifier
uint64 GetSystemIdentifier(void)
Definition: xlog.c:4734

RecoveryInProgress
bool RecoveryInProgress(void)
Definition: xlog.c:6522

GetWALInsertionTimeLine
TimeLineID GetWALInsertionTimeLine(void)
Definition: xlog.c:6708

WALReadFromBuffers
Size WALReadFromBuffers(char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
Definition: xlog.c:1759

CheckXLogRemoved
void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:3866

wal_segment_size
int wal_segment_size
Definition: xlog.c:143

GetFlushRecPtr
XLogRecPtr GetFlushRecPtr(TimeLineID *insertTLI)
Definition: xlog.c:6687

XLogBackgroundFlush
bool XLogBackgroundFlush(void)
Definition: xlog.c:3111

xlog_internal.h

MAXFNAMELEN
#define MAXFNAMELEN
Definition: xlog_internal.h:156

XLByteToSeg
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
Definition: xlog_internal.h:117

XLogFilePath
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
Definition: xlog_internal.h:210

XLogFileName
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
Definition: xlog_internal.h:166

TLHistoryFilePath
static void TLHistoryFilePath(char *path, TimeLineID tli)
Definition: xlog_internal.h:232

TLHistoryFileName
static void TLHistoryFileName(char *fname, TimeLineID tli)
Definition: xlog_internal.h:218

LSN_FORMAT_ARGS
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43

XLogRecPtrIsInvalid
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29

XLogRecPtr
uint64 XLogRecPtr
Definition: xlogdefs.h:21

InvalidXLogRecPtr
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

TimeLineID
uint32 TimeLineID
Definition: xlogdefs.h:59

XLogSegNo
uint64 XLogSegNo
Definition: xlogdefs.h:48

XLogReaderAllocate
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:107

WALRead
bool WALRead(XLogReaderState *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli, WALReadError *errinfo)
Definition: xlogreader.c:1504

XLogReadRecord
XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
Definition: xlogreader.c:390

XLogBeginRead
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
Definition: xlogreader.c:232

xlogreader.h

XL_ROUTINE
#define XL_ROUTINE(...)
Definition: xlogreader.h:117

receiveTLI
static TimeLineID receiveTLI
Definition: xlogrecovery.c:264

GetXLogReplayRecPtr
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
Definition: xlogrecovery.c:4559

xlogrecovery.h

wal_segment_close
void wal_segment_close(XLogReaderState *state)
Definition: xlogutils.c:831

XLogReadDetermineTimeline
void XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wantLength, TimeLineID currTLI)
Definition: xlogutils.c:707

WALReadRaiseError
void WALReadRaiseError(WALReadError *errinfo)
Definition: xlogutils.c:1011

xlogutils.h