PostgreSQL Source Code  git master
worker.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  * worker.c
3  * PostgreSQL logical replication worker (apply)
4  *
5  * Copyright (c) 2016-2022, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/backend/replication/logical/worker.c
9  *
10  * NOTES
11  * This file contains the worker which applies logical changes as they come
12  * from remote logical replication stream.
13  *
14  * The main worker (apply) is started by logical replication worker
15  * launcher for every enabled subscription in a database. It uses
16  * walsender protocol to communicate with publisher.
17  *
18  * This module includes server facing code and shares libpqwalreceiver
19  * module with walreceiver for providing the libpq specific functionality.
20  *
21  *
22  * STREAMED TRANSACTIONS
23  * ---------------------
24  * Streamed transactions (large transactions exceeding a memory limit on the
25  * upstream) are not applied immediately, but instead, the data is written
26  * to temporary files and then applied at once when the final commit arrives.
27  *
28  * Unlike the regular (non-streamed) case, handling streamed transactions has
29  * to handle aborts of both the toplevel transaction and subtransactions. This
30  * is achieved by tracking offsets for subtransactions, which is then used
31  * to truncate the file with serialized changes.
32  *
33  * The files are placed in tmp file directory by default, and the filenames
34  * include both the XID of the toplevel transaction and OID of the
35  * subscription. This is necessary so that different workers processing a
36  * remote transaction with the same XID doesn't interfere.
37  *
38  * We use BufFiles instead of using normal temporary files because (a) the
39  * BufFile infrastructure supports temporary files that exceed the OS file size
40  * limit, (b) provides a way for automatic clean up on the error and (c) provides
41  * a way to survive these files across local transactions and allow to open and
42  * close at stream start and close. We decided to use FileSet
43  * infrastructure as without that it deletes the files on the closure of the
44  * file and if we decide to keep stream files open across the start/stop stream
45  * then it will consume a lot of memory (more than 8K for each BufFile and
46  * there could be multiple such BufFiles as the subscriber could receive
47  * multiple start/stop streams for different transactions before getting the
48  * commit). Moreover, if we don't use FileSet then we also need to invent
49  * a new way to pass filenames to BufFile APIs so that we are allowed to open
50  * the file we desired across multiple stream-open calls for the same
51  * transaction.
52  *
53  * TWO_PHASE TRANSACTIONS
54  * ----------------------
55  * Two phase transactions are replayed at prepare and then committed or
56  * rolled back at commit prepared and rollback prepared respectively. It is
57  * possible to have a prepared transaction that arrives at the apply worker
58  * when the tablesync is busy doing the initial copy. In this case, the apply
59  * worker skips all the prepared operations [e.g. inserts] while the tablesync
60  * is still busy (see the condition of should_apply_changes_for_rel). The
61  * tablesync worker might not get such a prepared transaction because say it
62  * was prior to the initial consistent point but might have got some later
63  * commits. Now, the tablesync worker will exit without doing anything for the
64  * prepared transaction skipped by the apply worker as the sync location for it
65  * will be already ahead of the apply worker's current location. This would lead
66  * to an "empty prepare", because later when the apply worker does the commit
67  * prepare, there is nothing in it (the inserts were skipped earlier).
68  *
69  * To avoid this, and similar prepare confusions the subscription's two_phase
70  * commit is enabled only after the initial sync is over. The two_phase option
71  * has been implemented as a tri-state with values DISABLED, PENDING, and
72  * ENABLED.
73  *
74  * Even if the user specifies they want a subscription with two_phase = on,
75  * internally it will start with a tri-state of PENDING which only becomes
76  * ENABLED after all tablesync initializations are completed - i.e. when all
77  * tablesync workers have reached their READY state. In other words, the value
78  * PENDING is only a temporary state for subscription start-up.
79  *
80  * Until the two_phase is properly available (ENABLED) the subscription will
81  * behave as if two_phase = off. When the apply worker detects that all
82  * tablesyncs have become READY (while the tri-state was PENDING) it will
83  * restart the apply worker process. This happens in
84  * process_syncing_tables_for_apply.
85  *
86  * When the (re-started) apply worker finds that all tablesyncs are READY for a
87  * two_phase tri-state of PENDING it start streaming messages with the
88  * two_phase option which in turn enables the decoding of two-phase commits at
89  * the publisher. Then, it updates the tri-state value from PENDING to ENABLED.
90  * Now, it is possible that during the time we have not enabled two_phase, the
91  * publisher (replication server) would have skipped some prepares but we
92  * ensure that such prepares are sent along with commit prepare, see
93  * ReorderBufferFinishPrepared.
94  *
95  * If the subscription has no tables then a two_phase tri-state PENDING is
96  * left unchanged. This lets the user still do an ALTER SUBSCRIPTION REFRESH
97  * PUBLICATION which might otherwise be disallowed (see below).
98  *
99  * If ever a user needs to be aware of the tri-state value, they can fetch it
100  * from the pg_subscription catalog (see column subtwophasestate).
101  *
102  * We don't allow to toggle two_phase option of a subscription because it can
103  * lead to an inconsistent replica. Consider, initially, it was on and we have
104  * received some prepare then we turn it off, now at commit time the server
105  * will send the entire transaction data along with the commit. With some more
106  * analysis, we can allow changing this option from off to on but not sure if
107  * that alone would be useful.
108  *
109  * Finally, to avoid problems mentioned in previous paragraphs from any
110  * subsequent (not READY) tablesyncs (need to toggle two_phase option from 'on'
111  * to 'off' and then again back to 'on') there is a restriction for
112  * ALTER SUBSCRIPTION REFRESH PUBLICATION. This command is not permitted when
113  * the two_phase tri-state is ENABLED, except when copy_data = false.
114  *
115  * We can get prepare of the same GID more than once for the genuine cases
116  * where we have defined multiple subscriptions for publications on the same
117  * server and prepared transaction has operations on tables subscribed to those
118  * subscriptions. For such cases, if we use the GID sent by publisher one of
119  * the prepares will be successful and others will fail, in which case the
120  * server will send them again. Now, this can lead to a deadlock if user has
121  * set synchronous_standby_names for all the subscriptions on subscriber. To
122  * avoid such deadlocks, we generate a unique GID (consisting of the
123  * subscription oid and the xid of the prepared transaction) for each prepare
124  * transaction on the subscriber.
125  *-------------------------------------------------------------------------
126  */
127 
128 #include "postgres.h"
129 
130 #include <sys/stat.h>
131 #include <unistd.h>
132 
133 #include "access/table.h"
134 #include "access/tableam.h"
135 #include "access/twophase.h"
136 #include "access/xact.h"
137 #include "access/xlog_internal.h"
138 #include "catalog/catalog.h"
139 #include "catalog/indexing.h"
140 #include "catalog/namespace.h"
141 #include "catalog/partition.h"
142 #include "catalog/pg_inherits.h"
143 #include "catalog/pg_subscription.h"
145 #include "catalog/pg_tablespace.h"
146 #include "commands/tablecmds.h"
147 #include "commands/tablespace.h"
148 #include "commands/trigger.h"
149 #include "executor/executor.h"
150 #include "executor/execPartition.h"
152 #include "funcapi.h"
153 #include "libpq/pqformat.h"
154 #include "libpq/pqsignal.h"
155 #include "mb/pg_wchar.h"
156 #include "miscadmin.h"
157 #include "nodes/makefuncs.h"
158 #include "optimizer/optimizer.h"
159 #include "pgstat.h"
160 #include "postmaster/bgworker.h"
161 #include "postmaster/interrupt.h"
162 #include "postmaster/postmaster.h"
163 #include "postmaster/walwriter.h"
164 #include "replication/decode.h"
165 #include "replication/logical.h"
169 #include "replication/origin.h"
171 #include "replication/snapbuild.h"
172 #include "replication/walreceiver.h"
174 #include "rewrite/rewriteHandler.h"
175 #include "storage/buffile.h"
176 #include "storage/bufmgr.h"
177 #include "storage/fd.h"
178 #include "storage/ipc.h"
179 #include "storage/lmgr.h"
180 #include "storage/proc.h"
181 #include "storage/procarray.h"
182 #include "tcop/tcopprot.h"
183 #include "utils/acl.h"
184 #include "utils/builtins.h"
185 #include "utils/catcache.h"
186 #include "utils/dynahash.h"
187 #include "utils/datum.h"
188 #include "utils/fmgroids.h"
189 #include "utils/guc.h"
190 #include "utils/inval.h"
191 #include "utils/lsyscache.h"
192 #include "utils/memutils.h"
193 #include "utils/pg_lsn.h"
194 #include "utils/rel.h"
195 #include "utils/rls.h"
196 #include "utils/syscache.h"
197 #include "utils/timeout.h"
198 
199 #define NAPTIME_PER_CYCLE 1000 /* max sleep time between cycles (1s) */
200 
201 typedef struct FlushPosition
202 {
207 
209 
210 typedef struct ApplyExecutionData
211 {
212  EState *estate; /* executor state, used to track resources */
213 
214  LogicalRepRelMapEntry *targetRel; /* replication target rel */
215  ResultRelInfo *targetRelInfo; /* ResultRelInfo for same */
216 
217  /* These fields are used when the target relation is partitioned: */
218  ModifyTableState *mtstate; /* dummy ModifyTable state */
219  PartitionTupleRouting *proute; /* partition routing info */
221 
222 /* Struct for saving and restoring apply errcontext information */
223 typedef struct ApplyErrorCallbackArg
224 {
225  LogicalRepMsgType command; /* 0 if invalid */
227 
228  /* Remote node information */
229  int remote_attnum; /* -1 if invalid */
232  char *origin_name;
234 
236 {
237  .command = 0,
238  .rel = NULL,
239  .remote_attnum = -1,
240  .remote_xid = InvalidTransactionId,
241  .finish_lsn = InvalidXLogRecPtr,
242  .origin_name = NULL,
243 };
244 
247 
248 /* per stream context for streaming transactions */
250 
252 
254 static bool MySubscriptionValid = false;
255 
258 
259 /* fields valid only when processing streamed transaction */
260 static bool in_streamed_transaction = false;
261 
263 
264 /*
265  * We enable skipping all data modification changes (INSERT, UPDATE, etc.) for
266  * the subscription if the remote transaction's finish LSN matches the subskiplsn.
267  * Once we start skipping changes, we don't stop it until we skip all changes of
268  * the transaction even if pg_subscription is updated and MySubscription->skiplsn
269  * gets changed or reset during that. Also, in streaming transaction cases, we
270  * don't skip receiving and spooling the changes since we decide whether or not
271  * to skip applying the changes when starting to apply changes. The subskiplsn is
272  * cleared after successfully skipping the transaction or applying non-empty
273  * transaction. The latter prevents the mistakenly specified subskiplsn from
274  * being left.
275  */
277 #define is_skipping_changes() (unlikely(!XLogRecPtrIsInvalid(skip_xact_finish_lsn)))
278 
279 /* BufFile handle of the current streaming file */
280 static BufFile *stream_fd = NULL;
281 
282 typedef struct SubXactInfo
283 {
284  TransactionId xid; /* XID of the subxact */
285  int fileno; /* file number in the buffile */
286  off_t offset; /* offset in the file */
288 
289 /* Sub-transaction data for the current streaming transaction */
290 typedef struct ApplySubXactData
291 {
292  uint32 nsubxacts; /* number of sub-transactions */
293  uint32 nsubxacts_max; /* current capacity of subxacts */
294  TransactionId subxact_last; /* xid of the last sub-transaction */
295  SubXactInfo *subxacts; /* sub-xact offset in changes file */
297 
299 
300 static inline void subxact_filename(char *path, Oid subid, TransactionId xid);
301 static inline void changes_filename(char *path, Oid subid, TransactionId xid);
302 
303 /*
304  * Information about subtransactions of a given toplevel transaction.
305  */
306 static void subxact_info_write(Oid subid, TransactionId xid);
307 static void subxact_info_read(Oid subid, TransactionId xid);
308 static void subxact_info_add(TransactionId xid);
309 static inline void cleanup_subxact_info(void);
310 
311 /*
312  * Serialize and deserialize changes for a toplevel transaction.
313  */
314 static void stream_cleanup_files(Oid subid, TransactionId xid);
315 static void stream_open_file(Oid subid, TransactionId xid,
316  bool first_segment);
317 static void stream_write_change(char action, StringInfo s);
318 static void stream_close_file(void);
319 
320 static void send_feedback(XLogRecPtr recvpos, bool force, bool requestReply);
321 
322 static void store_flush_position(XLogRecPtr remote_lsn);
323 
324 static void maybe_reread_subscription(void);
325 
326 static void DisableSubscriptionAndExit(void);
327 
328 /* prototype needed because of stream_commit */
329 static void apply_dispatch(StringInfo s);
330 
331 static void apply_handle_commit_internal(LogicalRepCommitData *commit_data);
333  ResultRelInfo *relinfo,
334  TupleTableSlot *remoteslot);
336  ResultRelInfo *relinfo,
337  TupleTableSlot *remoteslot,
338  LogicalRepTupleData *newtup);
340  ResultRelInfo *relinfo,
341  TupleTableSlot *remoteslot);
342 static bool FindReplTupleInLocalRel(EState *estate, Relation localrel,
343  LogicalRepRelation *remoterel,
344  TupleTableSlot *remoteslot,
345  TupleTableSlot **localslot);
347  TupleTableSlot *remoteslot,
348  LogicalRepTupleData *newtup,
349  CmdType operation);
350 
351 /* Compute GID for two_phase transactions */
352 static void TwoPhaseTransactionGid(Oid subid, TransactionId xid, char *gid, int szgid);
353 
354 /* Common streaming function to apply all the spooled messages */
355 static void apply_spooled_messages(TransactionId xid, XLogRecPtr lsn);
356 
357 /* Functions for skipping changes */
358 static void maybe_start_skipping_changes(XLogRecPtr finish_lsn);
359 static void stop_skipping_changes(void);
360 static void clear_subscription_skip_lsn(XLogRecPtr finish_lsn);
361 
362 /* Functions for apply error callback */
363 static void apply_error_callback(void *arg);
364 static inline void set_apply_error_context_xact(TransactionId xid, XLogRecPtr lsn);
365 static inline void reset_apply_error_context_info(void);
366 
367 /*
368  * Form the origin name for the subscription.
369  *
370  * This is a common function for tablesync and other workers. Tablesync workers
371  * must pass a valid relid. Other callers must pass relid = InvalidOid.
372  *
373  * Return the name in the supplied buffer.
374  */
375 void
377  char *originname, Size szoriginname)
378 {
379  if (OidIsValid(relid))
380  {
381  /* Replication origin name for tablesync workers. */
382  snprintf(originname, szoriginname, "pg_%u_%u", suboid, relid);
383  }
384  else
385  {
386  /* Replication origin name for non-tablesync workers. */
387  snprintf(originname, szoriginname, "pg_%u", suboid);
388  }
389 }
390 
391 /*
392  * Should this worker apply changes for given relation.
393  *
394  * This is mainly needed for initial relation data sync as that runs in
395  * separate worker process running in parallel and we need some way to skip
396  * changes coming to the main apply worker during the sync of a table.
397  *
398  * Note we need to do smaller or equals comparison for SYNCDONE state because
399  * it might hold position of end of initial slot consistent point WAL
400  * record + 1 (ie start of next record) and next record can be COMMIT of
401  * transaction we are now processing (which is what we set remote_final_lsn
402  * to in apply_handle_begin).
403  */
404 static bool
406 {
407  if (am_tablesync_worker())
408  return MyLogicalRepWorker->relid == rel->localreloid;
409  else
410  return (rel->state == SUBREL_STATE_READY ||
411  (rel->state == SUBREL_STATE_SYNCDONE &&
412  rel->statelsn <= remote_final_lsn));
413 }
414 
415 /*
416  * Begin one step (one INSERT, UPDATE, etc) of a replication transaction.
417  *
418  * Start a transaction, if this is the first step (else we keep using the
419  * existing transaction).
420  * Also provide a global snapshot and ensure we run in ApplyMessageContext.
421  */
422 static void
424 {
426 
427  if (!IsTransactionState())
428  {
431  }
432 
434 
436 }
437 
438 /*
439  * Finish up one step of a replication transaction.
440  * Callers of begin_replication_step() must also call this.
441  *
442  * We don't close out the transaction here, but we should increment
443  * the command counter to make the effects of this step visible.
444  */
445 static void
447 {
449 
451 }
452 
453 /*
454  * Handle streamed transactions.
455  *
456  * If in streaming mode (receiving a block of streamed transaction), we
457  * simply redirect it to a file for the proper toplevel transaction.
458  *
459  * Returns true for streamed transactions, false otherwise (regular mode).
460  */
461 static bool
463 {
464  TransactionId xid;
465 
466  /* not in streaming mode */
468  return false;
469 
470  Assert(stream_fd != NULL);
472 
473  /*
474  * We should have received XID of the subxact as the first part of the
475  * message, so extract it.
476  */
477  xid = pq_getmsgint(s, 4);
478 
479  if (!TransactionIdIsValid(xid))
480  ereport(ERROR,
481  (errcode(ERRCODE_PROTOCOL_VIOLATION),
482  errmsg_internal("invalid transaction ID in streamed replication transaction")));
483 
484  /* Add the new subxact to the array (unless already there). */
485  subxact_info_add(xid);
486 
487  /* write the change to the current file */
489 
490  return true;
491 }
492 
493 /*
494  * Executor state preparation for evaluation of constraint expressions,
495  * indexes and triggers for the specified relation.
496  *
497  * Note that the caller must open and close any indexes to be updated.
498  */
499 static ApplyExecutionData *
501 {
502  ApplyExecutionData *edata;
503  EState *estate;
504  RangeTblEntry *rte;
505  ResultRelInfo *resultRelInfo;
506 
507  edata = (ApplyExecutionData *) palloc0(sizeof(ApplyExecutionData));
508  edata->targetRel = rel;
509 
510  edata->estate = estate = CreateExecutorState();
511 
512  rte = makeNode(RangeTblEntry);
513  rte->rtekind = RTE_RELATION;
514  rte->relid = RelationGetRelid(rel->localrel);
515  rte->relkind = rel->localrel->rd_rel->relkind;
517  ExecInitRangeTable(estate, list_make1(rte));
518 
519  edata->targetRelInfo = resultRelInfo = makeNode(ResultRelInfo);
520 
521  /*
522  * Use Relation opened by logicalrep_rel_open() instead of opening it
523  * again.
524  */
525  InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);
526 
527  /*
528  * We put the ResultRelInfo in the es_opened_result_relations list, even
529  * though we don't populate the es_result_relations array. That's a bit
530  * bogus, but it's enough to make ExecGetTriggerResultRel() find them.
531  *
532  * ExecOpenIndices() is not called here either, each execution path doing
533  * an apply operation being responsible for that.
534  */
536  lappend(estate->es_opened_result_relations, resultRelInfo);
537 
538  estate->es_output_cid = GetCurrentCommandId(true);
539 
540  /* Prepare to catch AFTER triggers. */
542 
543  /* other fields of edata remain NULL for now */
544 
545  return edata;
546 }
547 
548 /*
549  * Finish any operations related to the executor state created by
550  * create_edata_for_relation().
551  */
552 static void
554 {
555  EState *estate = edata->estate;
556 
557  /* Handle any queued AFTER triggers. */
558  AfterTriggerEndQuery(estate);
559 
560  /* Shut down tuple routing, if any was done. */
561  if (edata->proute)
562  ExecCleanupTupleRouting(edata->mtstate, edata->proute);
563 
564  /*
565  * Cleanup. It might seem that we should call ExecCloseResultRelations()
566  * here, but we intentionally don't. It would close the rel we added to
567  * es_opened_result_relations above, which is wrong because we took no
568  * corresponding refcount. We rely on ExecCleanupTupleRouting() to close
569  * any other relations opened during execution.
570  */
571  ExecResetTupleTable(estate->es_tupleTable, false);
572  FreeExecutorState(estate);
573  pfree(edata);
574 }
575 
576 /*
577  * Executes default values for columns for which we can't map to remote
578  * relation columns.
579  *
580  * This allows us to support tables which have more columns on the downstream
581  * than on the upstream.
582  */
583 static void
585  TupleTableSlot *slot)
586 {
587  TupleDesc desc = RelationGetDescr(rel->localrel);
588  int num_phys_attrs = desc->natts;
589  int i;
590  int attnum,
591  num_defaults = 0;
592  int *defmap;
593  ExprState **defexprs;
594  ExprContext *econtext;
595 
596  econtext = GetPerTupleExprContext(estate);
597 
598  /* We got all the data via replication, no need to evaluate anything. */
599  if (num_phys_attrs == rel->remoterel.natts)
600  return;
601 
602  defmap = (int *) palloc(num_phys_attrs * sizeof(int));
603  defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
604 
605  Assert(rel->attrmap->maplen == num_phys_attrs);
606  for (attnum = 0; attnum < num_phys_attrs; attnum++)
607  {
608  Expr *defexpr;
609 
610  if (TupleDescAttr(desc, attnum)->attisdropped || TupleDescAttr(desc, attnum)->attgenerated)
611  continue;
612 
613  if (rel->attrmap->attnums[attnum] >= 0)
614  continue;
615 
616  defexpr = (Expr *) build_column_default(rel->localrel, attnum + 1);
617 
618  if (defexpr != NULL)
619  {
620  /* Run the expression through planner */
621  defexpr = expression_planner(defexpr);
622 
623  /* Initialize executable expression in copycontext */
624  defexprs[num_defaults] = ExecInitExpr(defexpr, NULL);
625  defmap[num_defaults] = attnum;
626  num_defaults++;
627  }
628  }
629 
630  for (i = 0; i < num_defaults; i++)
631  slot->tts_values[defmap[i]] =
632  ExecEvalExpr(defexprs[i], econtext, &slot->tts_isnull[defmap[i]]);
633 }
634 
635 /*
636  * Store tuple data into slot.
637  *
638  * Incoming data can be either text or binary format.
639  */
640 static void
642  LogicalRepTupleData *tupleData)
643 {
644  int natts = slot->tts_tupleDescriptor->natts;
645  int i;
646 
647  ExecClearTuple(slot);
648 
649  /* Call the "in" function for each non-dropped, non-null attribute */
650  Assert(natts == rel->attrmap->maplen);
651  for (i = 0; i < natts; i++)
652  {
654  int remoteattnum = rel->attrmap->attnums[i];
655 
656  if (!att->attisdropped && remoteattnum >= 0)
657  {
658  StringInfo colvalue = &tupleData->colvalues[remoteattnum];
659 
660  Assert(remoteattnum < tupleData->ncols);
661 
662  /* Set attnum for error callback */
664 
665  if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_TEXT)
666  {
667  Oid typinput;
668  Oid typioparam;
669 
670  getTypeInputInfo(att->atttypid, &typinput, &typioparam);
671  slot->tts_values[i] =
672  OidInputFunctionCall(typinput, colvalue->data,
673  typioparam, att->atttypmod);
674  slot->tts_isnull[i] = false;
675  }
676  else if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_BINARY)
677  {
678  Oid typreceive;
679  Oid typioparam;
680 
681  /*
682  * In some code paths we may be asked to re-parse the same
683  * tuple data. Reset the StringInfo's cursor so that works.
684  */
685  colvalue->cursor = 0;
686 
687  getTypeBinaryInputInfo(att->atttypid, &typreceive, &typioparam);
688  slot->tts_values[i] =
689  OidReceiveFunctionCall(typreceive, colvalue,
690  typioparam, att->atttypmod);
691 
692  /* Trouble if it didn't eat the whole buffer */
693  if (colvalue->cursor != colvalue->len)
694  ereport(ERROR,
695  (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
696  errmsg("incorrect binary data format in logical replication column %d",
697  remoteattnum + 1)));
698  slot->tts_isnull[i] = false;
699  }
700  else
701  {
702  /*
703  * NULL value from remote. (We don't expect to see
704  * LOGICALREP_COLUMN_UNCHANGED here, but if we do, treat it as
705  * NULL.)
706  */
707  slot->tts_values[i] = (Datum) 0;
708  slot->tts_isnull[i] = true;
709  }
710 
711  /* Reset attnum for error callback */
713  }
714  else
715  {
716  /*
717  * We assign NULL to dropped attributes and missing values
718  * (missing values should be later filled using
719  * slot_fill_defaults).
720  */
721  slot->tts_values[i] = (Datum) 0;
722  slot->tts_isnull[i] = true;
723  }
724  }
725 
726  ExecStoreVirtualTuple(slot);
727 }
728 
729 /*
730  * Replace updated columns with data from the LogicalRepTupleData struct.
731  * This is somewhat similar to heap_modify_tuple but also calls the type
732  * input functions on the user data.
733  *
734  * "slot" is filled with a copy of the tuple in "srcslot", replacing
735  * columns provided in "tupleData" and leaving others as-is.
736  *
737  * Caution: unreplaced pass-by-ref columns in "slot" will point into the
738  * storage for "srcslot". This is OK for current usage, but someday we may
739  * need to materialize "slot" at the end to make it independent of "srcslot".
740  */
741 static void
744  LogicalRepTupleData *tupleData)
745 {
746  int natts = slot->tts_tupleDescriptor->natts;
747  int i;
748 
749  /* We'll fill "slot" with a virtual tuple, so we must start with ... */
750  ExecClearTuple(slot);
751 
752  /*
753  * Copy all the column data from srcslot, so that we'll have valid values
754  * for unreplaced columns.
755  */
756  Assert(natts == srcslot->tts_tupleDescriptor->natts);
757  slot_getallattrs(srcslot);
758  memcpy(slot->tts_values, srcslot->tts_values, natts * sizeof(Datum));
759  memcpy(slot->tts_isnull, srcslot->tts_isnull, natts * sizeof(bool));
760 
761  /* Call the "in" function for each replaced attribute */
762  Assert(natts == rel->attrmap->maplen);
763  for (i = 0; i < natts; i++)
764  {
766  int remoteattnum = rel->attrmap->attnums[i];
767 
768  if (remoteattnum < 0)
769  continue;
770 
771  Assert(remoteattnum < tupleData->ncols);
772 
773  if (tupleData->colstatus[remoteattnum] != LOGICALREP_COLUMN_UNCHANGED)
774  {
775  StringInfo colvalue = &tupleData->colvalues[remoteattnum];
776 
777  /* Set attnum for error callback */
779 
780  if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_TEXT)
781  {
782  Oid typinput;
783  Oid typioparam;
784 
785  getTypeInputInfo(att->atttypid, &typinput, &typioparam);
786  slot->tts_values[i] =
787  OidInputFunctionCall(typinput, colvalue->data,
788  typioparam, att->atttypmod);
789  slot->tts_isnull[i] = false;
790  }
791  else if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_BINARY)
792  {
793  Oid typreceive;
794  Oid typioparam;
795 
796  /*
797  * In some code paths we may be asked to re-parse the same
798  * tuple data. Reset the StringInfo's cursor so that works.
799  */
800  colvalue->cursor = 0;
801 
802  getTypeBinaryInputInfo(att->atttypid, &typreceive, &typioparam);
803  slot->tts_values[i] =
804  OidReceiveFunctionCall(typreceive, colvalue,
805  typioparam, att->atttypmod);
806 
807  /* Trouble if it didn't eat the whole buffer */
808  if (colvalue->cursor != colvalue->len)
809  ereport(ERROR,
810  (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
811  errmsg("incorrect binary data format in logical replication column %d",
812  remoteattnum + 1)));
813  slot->tts_isnull[i] = false;
814  }
815  else
816  {
817  /* must be LOGICALREP_COLUMN_NULL */
818  slot->tts_values[i] = (Datum) 0;
819  slot->tts_isnull[i] = true;
820  }
821 
822  /* Reset attnum for error callback */
824  }
825  }
826 
827  /* And finally, declare that "slot" contains a valid virtual tuple */
828  ExecStoreVirtualTuple(slot);
829 }
830 
831 /*
832  * Handle BEGIN message.
833  */
834 static void
836 {
837  LogicalRepBeginData begin_data;
838 
839  logicalrep_read_begin(s, &begin_data);
840  set_apply_error_context_xact(begin_data.xid, begin_data.final_lsn);
841 
842  remote_final_lsn = begin_data.final_lsn;
843 
845 
846  in_remote_transaction = true;
847 
849 }
850 
851 /*
852  * Handle COMMIT message.
853  *
854  * TODO, support tracking of multiple origins
855  */
856 static void
858 {
859  LogicalRepCommitData commit_data;
860 
861  logicalrep_read_commit(s, &commit_data);
862 
863  if (commit_data.commit_lsn != remote_final_lsn)
864  ereport(ERROR,
865  (errcode(ERRCODE_PROTOCOL_VIOLATION),
866  errmsg_internal("incorrect commit LSN %X/%X in commit message (expected %X/%X)",
867  LSN_FORMAT_ARGS(commit_data.commit_lsn),
869 
870  apply_handle_commit_internal(&commit_data);
871 
872  /* Process any tables that are being synchronized in parallel. */
873  process_syncing_tables(commit_data.end_lsn);
874 
877 }
878 
879 /*
880  * Handle BEGIN PREPARE message.
881  */
882 static void
884 {
885  LogicalRepPreparedTxnData begin_data;
886 
887  /* Tablesync should never receive prepare. */
888  if (am_tablesync_worker())
889  ereport(ERROR,
890  (errcode(ERRCODE_PROTOCOL_VIOLATION),
891  errmsg_internal("tablesync worker received a BEGIN PREPARE message")));
892 
893  logicalrep_read_begin_prepare(s, &begin_data);
894  set_apply_error_context_xact(begin_data.xid, begin_data.prepare_lsn);
895 
896  remote_final_lsn = begin_data.prepare_lsn;
897 
899 
900  in_remote_transaction = true;
901 
903 }
904 
905 /*
906  * Common function to prepare the GID.
907  */
908 static void
910 {
911  char gid[GIDSIZE];
912 
913  /*
914  * Compute unique GID for two_phase transactions. We don't use GID of
915  * prepared transaction sent by server as that can lead to deadlock when
916  * we have multiple subscriptions from same node point to publications on
917  * the same node. See comments atop worker.c
918  */
920  gid, sizeof(gid));
921 
922  /*
923  * BeginTransactionBlock is necessary to balance the EndTransactionBlock
924  * called within the PrepareTransactionBlock below.
925  */
927  CommitTransactionCommand(); /* Completes the preceding Begin command. */
928 
929  /*
930  * Update origin state so we can restart streaming from correct position
931  * in case of crash.
932  */
933  replorigin_session_origin_lsn = prepare_data->end_lsn;
935 
937 }
938 
939 /*
940  * Handle PREPARE message.
941  */
942 static void
944 {
945  LogicalRepPreparedTxnData prepare_data;
946 
947  logicalrep_read_prepare(s, &prepare_data);
948 
949  if (prepare_data.prepare_lsn != remote_final_lsn)
950  ereport(ERROR,
951  (errcode(ERRCODE_PROTOCOL_VIOLATION),
952  errmsg_internal("incorrect prepare LSN %X/%X in prepare message (expected %X/%X)",
953  LSN_FORMAT_ARGS(prepare_data.prepare_lsn),
955 
956  /*
957  * Unlike commit, here, we always prepare the transaction even though no
958  * change has happened in this transaction or all changes are skipped. It
959  * is done this way because at commit prepared time, we won't know whether
960  * we have skipped preparing a transaction because of those reasons.
961  *
962  * XXX, We can optimize such that at commit prepared time, we first check
963  * whether we have prepared the transaction or not but that doesn't seem
964  * worthwhile because such cases shouldn't be common.
965  */
967 
968  apply_handle_prepare_internal(&prepare_data);
969 
972  pgstat_report_stat(false);
973 
974  store_flush_position(prepare_data.end_lsn);
975 
976  in_remote_transaction = false;
977 
978  /* Process any tables that are being synchronized in parallel. */
979  process_syncing_tables(prepare_data.end_lsn);
980 
981  /*
982  * Since we have already prepared the transaction, in a case where the
983  * server crashes before clearing the subskiplsn, it will be left but the
984  * transaction won't be resent. But that's okay because it's a rare case
985  * and the subskiplsn will be cleared when finishing the next transaction.
986  */
989 
992 }
993 
994 /*
995  * Handle a COMMIT PREPARED of a previously PREPARED transaction.
996  */
997 static void
999 {
1000  LogicalRepCommitPreparedTxnData prepare_data;
1001  char gid[GIDSIZE];
1002 
1003  logicalrep_read_commit_prepared(s, &prepare_data);
1004  set_apply_error_context_xact(prepare_data.xid, prepare_data.commit_lsn);
1005 
1006  /* Compute GID for two_phase transactions. */
1008  gid, sizeof(gid));
1009 
1010  /* There is no transaction when COMMIT PREPARED is called */
1012 
1013  /*
1014  * Update origin state so we can restart streaming from correct position
1015  * in case of crash.
1016  */
1017  replorigin_session_origin_lsn = prepare_data.end_lsn;
1019 
1020  FinishPreparedTransaction(gid, true);
1023  pgstat_report_stat(false);
1024 
1025  store_flush_position(prepare_data.end_lsn);
1026  in_remote_transaction = false;
1027 
1028  /* Process any tables that are being synchronized in parallel. */
1029  process_syncing_tables(prepare_data.end_lsn);
1030 
1031  clear_subscription_skip_lsn(prepare_data.end_lsn);
1032 
1035 }
1036 
1037 /*
1038  * Handle a ROLLBACK PREPARED of a previously PREPARED TRANSACTION.
1039  */
1040 static void
1042 {
1043  LogicalRepRollbackPreparedTxnData rollback_data;
1044  char gid[GIDSIZE];
1045 
1046  logicalrep_read_rollback_prepared(s, &rollback_data);
1047  set_apply_error_context_xact(rollback_data.xid, rollback_data.rollback_end_lsn);
1048 
1049  /* Compute GID for two_phase transactions. */
1050  TwoPhaseTransactionGid(MySubscription->oid, rollback_data.xid,
1051  gid, sizeof(gid));
1052 
1053  /*
1054  * It is possible that we haven't received prepare because it occurred
1055  * before walsender reached a consistent point or the two_phase was still
1056  * not enabled by that time, so in such cases, we need to skip rollback
1057  * prepared.
1058  */
1059  if (LookupGXact(gid, rollback_data.prepare_end_lsn,
1060  rollback_data.prepare_time))
1061  {
1062  /*
1063  * Update origin state so we can restart streaming from correct
1064  * position in case of crash.
1065  */
1068 
1069  /* There is no transaction when ABORT/ROLLBACK PREPARED is called */
1071  FinishPreparedTransaction(gid, false);
1074 
1076  }
1077 
1078  pgstat_report_stat(false);
1079 
1080  store_flush_position(rollback_data.rollback_end_lsn);
1081  in_remote_transaction = false;
1082 
1083  /* Process any tables that are being synchronized in parallel. */
1085 
1088 }
1089 
1090 /*
1091  * Handle STREAM PREPARE.
1092  *
1093  * Logic is in two parts:
1094  * 1. Replay all the spooled operations
1095  * 2. Mark the transaction as prepared
1096  */
1097 static void
1099 {
1100  LogicalRepPreparedTxnData prepare_data;
1101 
1103  ereport(ERROR,
1104  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1105  errmsg_internal("STREAM PREPARE message without STREAM STOP")));
1106 
1107  /* Tablesync should never receive prepare. */
1108  if (am_tablesync_worker())
1109  ereport(ERROR,
1110  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1111  errmsg_internal("tablesync worker received a STREAM PREPARE message")));
1112 
1113  logicalrep_read_stream_prepare(s, &prepare_data);
1114  set_apply_error_context_xact(prepare_data.xid, prepare_data.prepare_lsn);
1115 
1116  elog(DEBUG1, "received prepare for streamed transaction %u", prepare_data.xid);
1117 
1118  /* Replay all the spooled operations. */
1119  apply_spooled_messages(prepare_data.xid, prepare_data.prepare_lsn);
1120 
1121  /* Mark the transaction as prepared. */
1122  apply_handle_prepare_internal(&prepare_data);
1123 
1125 
1126  pgstat_report_stat(false);
1127 
1128  store_flush_position(prepare_data.end_lsn);
1129 
1130  in_remote_transaction = false;
1131 
1132  /* unlink the files with serialized changes and subxact info. */
1134 
1135  /* Process any tables that are being synchronized in parallel. */
1136  process_syncing_tables(prepare_data.end_lsn);
1137 
1138  /*
1139  * Similar to prepare case, the subskiplsn could be left in a case of
1140  * server crash but it's okay. See the comments in apply_handle_prepare().
1141  */
1144 
1146 
1148 }
1149 
1150 /*
1151  * Handle ORIGIN message.
1152  *
1153  * TODO, support tracking of multiple origins
1154  */
1155 static void
1157 {
1158  /*
1159  * ORIGIN message can only come inside streaming transaction or inside
1160  * remote transaction and before any actual writes.
1161  */
1162  if (!in_streamed_transaction &&
1165  ereport(ERROR,
1166  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1167  errmsg_internal("ORIGIN message sent out of order")));
1168 }
1169 
1170 /*
1171  * Handle STREAM START message.
1172  */
1173 static void
1175 {
1176  bool first_segment;
1177 
1179  ereport(ERROR,
1180  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1181  errmsg_internal("duplicate STREAM START message")));
1182 
1183  /*
1184  * Start a transaction on stream start, this transaction will be committed
1185  * on the stream stop unless it is a tablesync worker in which case it
1186  * will be committed after processing all the messages. We need the
1187  * transaction for handling the buffile, used for serializing the
1188  * streaming data and subxact info.
1189  */
1191 
1192  /* notify handle methods we're processing a remote transaction */
1193  in_streamed_transaction = true;
1194 
1195  /* extract XID of the top-level transaction */
1196  stream_xid = logicalrep_read_stream_start(s, &first_segment);
1197 
1199  ereport(ERROR,
1200  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1201  errmsg_internal("invalid transaction ID in streamed replication transaction")));
1202 
1204 
1205  /*
1206  * Initialize the worker's stream_fileset if we haven't yet. This will be
1207  * used for the entire duration of the worker so create it in a permanent
1208  * context. We create this on the very first streaming message from any
1209  * transaction and then use it for this and other streaming transactions.
1210  * Now, we could create a fileset at the start of the worker as well but
1211  * then we won't be sure that it will ever be used.
1212  */
1213  if (MyLogicalRepWorker->stream_fileset == NULL)
1214  {
1215  MemoryContext oldctx;
1216 
1218 
1221 
1222  MemoryContextSwitchTo(oldctx);
1223  }
1224 
1225  /* open the spool file for this transaction */
1227 
1228  /* if this is not the first segment, open existing subxact file */
1229  if (!first_segment)
1231 
1233 
1235 }
1236 
1237 /*
1238  * Handle STREAM STOP message.
1239  */
1240 static void
1242 {
1244  ereport(ERROR,
1245  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1246  errmsg_internal("STREAM STOP message without STREAM START")));
1247 
1248  /*
1249  * Close the file with serialized changes, and serialize information about
1250  * subxacts for the toplevel transaction.
1251  */
1254 
1255  /* We must be in a valid transaction state */
1257 
1258  /* Commit the per-stream transaction */
1260 
1261  in_streamed_transaction = false;
1262 
1263  /* Reset per-stream context */
1265 
1268 }
1269 
1270 /*
1271  * Handle STREAM abort message.
1272  */
1273 static void
1275 {
1276  TransactionId xid;
1277  TransactionId subxid;
1278 
1280  ereport(ERROR,
1281  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1282  errmsg_internal("STREAM ABORT message without STREAM STOP")));
1283 
1284  logicalrep_read_stream_abort(s, &xid, &subxid);
1285 
1286  /*
1287  * If the two XIDs are the same, it's in fact abort of toplevel xact, so
1288  * just delete the files with serialized info.
1289  */
1290  if (xid == subxid)
1291  {
1294  }
1295  else
1296  {
1297  /*
1298  * OK, so it's a subxact. We need to read the subxact file for the
1299  * toplevel transaction, determine the offset tracked for the subxact,
1300  * and truncate the file with changes. We also remove the subxacts
1301  * with higher offsets (or rather higher XIDs).
1302  *
1303  * We intentionally scan the array from the tail, because we're likely
1304  * aborting a change for the most recent subtransactions.
1305  *
1306  * We can't use the binary search here as subxact XIDs won't
1307  * necessarily arrive in sorted order, consider the case where we have
1308  * released the savepoint for multiple subtransactions and then
1309  * performed rollback to savepoint for one of the earlier
1310  * sub-transaction.
1311  */
1312  int64 i;
1313  int64 subidx;
1314  BufFile *fd;
1315  bool found = false;
1316  char path[MAXPGPATH];
1317 
1319 
1320  subidx = -1;
1323 
1324  for (i = subxact_data.nsubxacts; i > 0; i--)
1325  {
1326  if (subxact_data.subxacts[i - 1].xid == subxid)
1327  {
1328  subidx = (i - 1);
1329  found = true;
1330  break;
1331  }
1332  }
1333 
1334  /*
1335  * If it's an empty sub-transaction then we will not find the subxid
1336  * here so just cleanup the subxact info and return.
1337  */
1338  if (!found)
1339  {
1340  /* Cleanup the subxact info */
1345  return;
1346  }
1347 
1348  /* open the changes file */
1351  O_RDWR, false);
1352 
1353  /* OK, truncate the file at the right offset */
1355  subxact_data.subxacts[subidx].offset);
1356  BufFileClose(fd);
1357 
1358  /* discard the subxacts added later */
1359  subxact_data.nsubxacts = subidx;
1360 
1361  /* write the updated subxact list */
1363 
1366  }
1367 
1369 }
1370 
1371 /*
1372  * Common spoolfile processing.
1373  */
1374 static void
1376 {
1378  int nchanges;
1379  char path[MAXPGPATH];
1380  char *buffer = NULL;
1381  MemoryContext oldcxt;
1382  BufFile *fd;
1383 
1385 
1386  /* Make sure we have an open transaction */
1388 
1389  /*
1390  * Allocate file handle and memory required to process all the messages in
1391  * TopTransactionContext to avoid them getting reset after each message is
1392  * processed.
1393  */
1395 
1396  /* Open the spool file for the committed/prepared transaction */
1398  elog(DEBUG1, "replaying changes from file \"%s\"", path);
1399 
1401  false);
1402 
1403  buffer = palloc(BLCKSZ);
1404  initStringInfo(&s2);
1405 
1406  MemoryContextSwitchTo(oldcxt);
1407 
1408  remote_final_lsn = lsn;
1409 
1410  /*
1411  * Make sure the handle apply_dispatch methods are aware we're in a remote
1412  * transaction.
1413  */
1414  in_remote_transaction = true;
1416 
1418 
1419  /*
1420  * Read the entries one by one and pass them through the same logic as in
1421  * apply_dispatch.
1422  */
1423  nchanges = 0;
1424  while (true)
1425  {
1426  int nbytes;
1427  int len;
1428 
1430 
1431  /* read length of the on-disk record */
1432  nbytes = BufFileRead(fd, &len, sizeof(len));
1433 
1434  /* have we reached end of the file? */
1435  if (nbytes == 0)
1436  break;
1437 
1438  /* do we have a correct length? */
1439  if (nbytes != sizeof(len))
1440  ereport(ERROR,
1442  errmsg("could not read from streaming transaction's changes file \"%s\": %m",
1443  path)));
1444 
1445  if (len <= 0)
1446  elog(ERROR, "incorrect length %d in streaming transaction's changes file \"%s\"",
1447  len, path);
1448 
1449  /* make sure we have sufficiently large buffer */
1450  buffer = repalloc(buffer, len);
1451 
1452  /* and finally read the data into the buffer */
1453  if (BufFileRead(fd, buffer, len) != len)
1454  ereport(ERROR,
1456  errmsg("could not read from streaming transaction's changes file \"%s\": %m",
1457  path)));
1458 
1459  /* copy the buffer to the stringinfo and call apply_dispatch */
1460  resetStringInfo(&s2);
1461  appendBinaryStringInfo(&s2, buffer, len);
1462 
1463  /* Ensure we are reading the data into our memory context. */
1465 
1466  apply_dispatch(&s2);
1467 
1469 
1470  MemoryContextSwitchTo(oldcxt);
1471 
1472  nchanges++;
1473 
1474  if (nchanges % 1000 == 0)
1475  elog(DEBUG1, "replayed %d changes from file \"%s\"",
1476  nchanges, path);
1477  }
1478 
1479  BufFileClose(fd);
1480 
1481  pfree(buffer);
1482  pfree(s2.data);
1483 
1484  elog(DEBUG1, "replayed %d (all) changes from file \"%s\"",
1485  nchanges, path);
1486 
1487  return;
1488 }
1489 
1490 /*
1491  * Handle STREAM COMMIT message.
1492  */
1493 static void
1495 {
1496  TransactionId xid;
1497  LogicalRepCommitData commit_data;
1498 
1500  ereport(ERROR,
1501  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1502  errmsg_internal("STREAM COMMIT message without STREAM STOP")));
1503 
1504  xid = logicalrep_read_stream_commit(s, &commit_data);
1505  set_apply_error_context_xact(xid, commit_data.commit_lsn);
1506 
1507  elog(DEBUG1, "received commit for streamed transaction %u", xid);
1508 
1509  apply_spooled_messages(xid, commit_data.commit_lsn);
1510 
1511  apply_handle_commit_internal(&commit_data);
1512 
1513  /* unlink the files with serialized changes and subxact info */
1515 
1516  /* Process any tables that are being synchronized in parallel. */
1517  process_syncing_tables(commit_data.end_lsn);
1518 
1520 
1522 }
1523 
1524 /*
1525  * Helper function for apply_handle_commit and apply_handle_stream_commit.
1526  */
1527 static void
1529 {
1530  if (is_skipping_changes())
1531  {
1533 
1534  /*
1535  * Start a new transaction to clear the subskiplsn, if not started
1536  * yet.
1537  */
1538  if (!IsTransactionState())
1540  }
1541 
1542  if (IsTransactionState())
1543  {
1544  /*
1545  * The transaction is either non-empty or skipped, so we clear the
1546  * subskiplsn.
1547  */
1549 
1550  /*
1551  * Update origin state so we can restart streaming from correct
1552  * position in case of crash.
1553  */
1554  replorigin_session_origin_lsn = commit_data->end_lsn;
1556 
1558  pgstat_report_stat(false);
1559 
1560  store_flush_position(commit_data->end_lsn);
1561  }
1562  else
1563  {
1564  /* Process any invalidation messages that might have accumulated. */
1567  }
1568 
1569  in_remote_transaction = false;
1570 }
1571 
1572 /*
1573  * Handle RELATION message.
1574  *
1575  * Note we don't do validation against local schema here. The validation
1576  * against local schema is postponed until first change for given relation
1577  * comes as we only care about it when applying changes for it anyway and we
1578  * do less locking this way.
1579  */
1580 static void
1582 {
1583  LogicalRepRelation *rel;
1584 
1586  return;
1587 
1588  rel = logicalrep_read_rel(s);
1590 
1591  /* Also reset all entries in the partition map that refer to remoterel. */
1593 }
1594 
1595 /*
1596  * Handle TYPE message.
1597  *
1598  * This implementation pays no attention to TYPE messages; we expect the user
1599  * to have set things up so that the incoming data is acceptable to the input
1600  * functions for the locally subscribed tables. Hence, we just read and
1601  * discard the message.
1602  */
1603 static void
1605 {
1606  LogicalRepTyp typ;
1607 
1609  return;
1610 
1611  logicalrep_read_typ(s, &typ);
1612 }
1613 
1614 /*
1615  * Get replica identity index or if it is not defined a primary key.
1616  *
1617  * If neither is defined, returns InvalidOid
1618  */
1619 static Oid
1621 {
1622  Oid idxoid;
1623 
1624  idxoid = RelationGetReplicaIndex(rel);
1625 
1626  if (!OidIsValid(idxoid))
1627  idxoid = RelationGetPrimaryKeyIndex(rel);
1628 
1629  return idxoid;
1630 }
1631 
1632 /*
1633  * Check that we (the subscription owner) have sufficient privileges on the
1634  * target relation to perform the given operation.
1635  */
1636 static void
1638 {
1639  Oid relid;
1640  AclResult aclresult;
1641 
1642  relid = RelationGetRelid(rel);
1643  aclresult = pg_class_aclcheck(relid, GetUserId(), mode);
1644  if (aclresult != ACLCHECK_OK)
1645  aclcheck_error(aclresult,
1646  get_relkind_objtype(rel->rd_rel->relkind),
1647  get_rel_name(relid));
1648 
1649  /*
1650  * We lack the infrastructure to honor RLS policies. It might be possible
1651  * to add such infrastructure here, but tablesync workers lack it, too, so
1652  * we don't bother. RLS does not ordinarily apply to TRUNCATE commands,
1653  * but it seems dangerous to replicate a TRUNCATE and then refuse to
1654  * replicate subsequent INSERTs, so we forbid all commands the same.
1655  */
1656  if (check_enable_rls(relid, InvalidOid, false) == RLS_ENABLED)
1657  ereport(ERROR,
1658  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1659  errmsg("user \"%s\" cannot replicate into relation with row-level security enabled: \"%s\"",
1660  GetUserNameFromId(GetUserId(), true),
1661  RelationGetRelationName(rel))));
1662 }
1663 
1664 /*
1665  * Handle INSERT message.
1666  */
1667 
1668 static void
1670 {
1671  LogicalRepRelMapEntry *rel;
1672  LogicalRepTupleData newtup;
1673  LogicalRepRelId relid;
1674  ApplyExecutionData *edata;
1675  EState *estate;
1676  TupleTableSlot *remoteslot;
1677  MemoryContext oldctx;
1678 
1679  /*
1680  * Quick return if we are skipping data modification changes or handling
1681  * streamed transactions.
1682  */
1683  if (is_skipping_changes() ||
1685  return;
1686 
1688 
1689  relid = logicalrep_read_insert(s, &newtup);
1690  rel = logicalrep_rel_open(relid, RowExclusiveLock);
1691  if (!should_apply_changes_for_rel(rel))
1692  {
1693  /*
1694  * The relation can't become interesting in the middle of the
1695  * transaction so it's safe to unlock it.
1696  */
1699  return;
1700  }
1701 
1702  /* Set relation for error callback */
1704 
1705  /* Initialize the executor state. */
1706  edata = create_edata_for_relation(rel);
1707  estate = edata->estate;
1708  remoteslot = ExecInitExtraTupleSlot(estate,
1709  RelationGetDescr(rel->localrel),
1710  &TTSOpsVirtual);
1711 
1712  /* Process and store remote tuple in the slot */
1714  slot_store_data(remoteslot, rel, &newtup);
1715  slot_fill_defaults(rel, estate, remoteslot);
1716  MemoryContextSwitchTo(oldctx);
1717 
1718  /* For a partitioned table, insert the tuple into a partition. */
1719  if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1721  remoteslot, NULL, CMD_INSERT);
1722  else
1724  remoteslot);
1725 
1726  finish_edata(edata);
1727 
1728  /* Reset relation for error callback */
1730 
1732 
1734 }
1735 
1736 /*
1737  * Workhorse for apply_handle_insert()
1738  * relinfo is for the relation we're actually inserting into
1739  * (could be a child partition of edata->targetRelInfo)
1740  */
1741 static void
1743  ResultRelInfo *relinfo,
1744  TupleTableSlot *remoteslot)
1745 {
1746  EState *estate = edata->estate;
1747 
1748  /* We must open indexes here. */
1749  ExecOpenIndices(relinfo, false);
1750 
1751  /* Do the insert. */
1753  ExecSimpleRelationInsert(relinfo, estate, remoteslot);
1754 
1755  /* Cleanup. */
1756  ExecCloseIndices(relinfo);
1757 }
1758 
1759 /*
1760  * Check if the logical replication relation is updatable and throw
1761  * appropriate error if it isn't.
1762  */
1763 static void
1765 {
1766  /*
1767  * For partitioned tables, we only need to care if the target partition is
1768  * updatable (aka has PK or RI defined for it).
1769  */
1770  if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1771  return;
1772 
1773  /* Updatable, no error. */
1774  if (rel->updatable)
1775  return;
1776 
1777  /*
1778  * We are in error mode so it's fine this is somewhat slow. It's better to
1779  * give user correct error.
1780  */
1782  {
1783  ereport(ERROR,
1784  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1785  errmsg("publisher did not send replica identity column "
1786  "expected by the logical replication target relation \"%s.%s\"",
1787  rel->remoterel.nspname, rel->remoterel.relname)));
1788  }
1789 
1790  ereport(ERROR,
1791  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1792  errmsg("logical replication target relation \"%s.%s\" has "
1793  "neither REPLICA IDENTITY index nor PRIMARY "
1794  "KEY and published relation does not have "
1795  "REPLICA IDENTITY FULL",
1796  rel->remoterel.nspname, rel->remoterel.relname)));
1797 }
1798 
1799 /*
1800  * Handle UPDATE message.
1801  *
1802  * TODO: FDW support
1803  */
1804 static void
1806 {
1807  LogicalRepRelMapEntry *rel;
1808  LogicalRepRelId relid;
1809  ApplyExecutionData *edata;
1810  EState *estate;
1811  LogicalRepTupleData oldtup;
1812  LogicalRepTupleData newtup;
1813  bool has_oldtup;
1814  TupleTableSlot *remoteslot;
1815  RangeTblEntry *target_rte;
1816  MemoryContext oldctx;
1817 
1818  /*
1819  * Quick return if we are skipping data modification changes or handling
1820  * streamed transactions.
1821  */
1822  if (is_skipping_changes() ||
1824  return;
1825 
1827 
1828  relid = logicalrep_read_update(s, &has_oldtup, &oldtup,
1829  &newtup);
1830  rel = logicalrep_rel_open(relid, RowExclusiveLock);
1831  if (!should_apply_changes_for_rel(rel))
1832  {
1833  /*
1834  * The relation can't become interesting in the middle of the
1835  * transaction so it's safe to unlock it.
1836  */
1839  return;
1840  }
1841 
1842  /* Set relation for error callback */
1844 
1845  /* Check if we can do the update. */
1847 
1848  /* Initialize the executor state. */
1849  edata = create_edata_for_relation(rel);
1850  estate = edata->estate;
1851  remoteslot = ExecInitExtraTupleSlot(estate,
1852  RelationGetDescr(rel->localrel),
1853  &TTSOpsVirtual);
1854 
1855  /*
1856  * Populate updatedCols so that per-column triggers can fire, and so
1857  * executor can correctly pass down indexUnchanged hint. This could
1858  * include more columns than were actually changed on the publisher
1859  * because the logical replication protocol doesn't contain that
1860  * information. But it would for example exclude columns that only exist
1861  * on the subscriber, since we are not touching those.
1862  */
1863  target_rte = list_nth(estate->es_range_table, 0);
1864  for (int i = 0; i < remoteslot->tts_tupleDescriptor->natts; i++)
1865  {
1867  int remoteattnum = rel->attrmap->attnums[i];
1868 
1869  if (!att->attisdropped && remoteattnum >= 0)
1870  {
1871  Assert(remoteattnum < newtup.ncols);
1872  if (newtup.colstatus[remoteattnum] != LOGICALREP_COLUMN_UNCHANGED)
1873  target_rte->updatedCols =
1874  bms_add_member(target_rte->updatedCols,
1876  }
1877  }
1878 
1879  /* Also populate extraUpdatedCols, in case we have generated columns */
1880  fill_extraUpdatedCols(target_rte, rel->localrel);
1881 
1882  /* Build the search tuple. */
1884  slot_store_data(remoteslot, rel,
1885  has_oldtup ? &oldtup : &newtup);
1886  MemoryContextSwitchTo(oldctx);
1887 
1888  /* For a partitioned table, apply update to correct partition. */
1889  if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1891  remoteslot, &newtup, CMD_UPDATE);
1892  else
1894  remoteslot, &newtup);
1895 
1896  finish_edata(edata);
1897 
1898  /* Reset relation for error callback */
1900 
1902 
1904 }
1905 
1906 /*
1907  * Workhorse for apply_handle_update()
1908  * relinfo is for the relation we're actually updating in
1909  * (could be a child partition of edata->targetRelInfo)
1910  */
1911 static void
1913  ResultRelInfo *relinfo,
1914  TupleTableSlot *remoteslot,
1915  LogicalRepTupleData *newtup)
1916 {
1917  EState *estate = edata->estate;
1918  LogicalRepRelMapEntry *relmapentry = edata->targetRel;
1919  Relation localrel = relinfo->ri_RelationDesc;
1920  EPQState epqstate;
1921  TupleTableSlot *localslot;
1922  bool found;
1923  MemoryContext oldctx;
1924 
1925  EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
1926  ExecOpenIndices(relinfo, false);
1927 
1928  found = FindReplTupleInLocalRel(estate, localrel,
1929  &relmapentry->remoterel,
1930  remoteslot, &localslot);
1931  ExecClearTuple(remoteslot);
1932 
1933  /*
1934  * Tuple found.
1935  *
1936  * Note this will fail if there are other conflicting unique indexes.
1937  */
1938  if (found)
1939  {
1940  /* Process and store remote tuple in the slot */
1942  slot_modify_data(remoteslot, localslot, relmapentry, newtup);
1943  MemoryContextSwitchTo(oldctx);
1944 
1945  EvalPlanQualSetSlot(&epqstate, remoteslot);
1946 
1947  /* Do the actual update. */
1949  ExecSimpleRelationUpdate(relinfo, estate, &epqstate, localslot,
1950  remoteslot);
1951  }
1952  else
1953  {
1954  /*
1955  * The tuple to be updated could not be found. Do nothing except for
1956  * emitting a log message.
1957  *
1958  * XXX should this be promoted to ereport(LOG) perhaps?
1959  */
1960  elog(DEBUG1,
1961  "logical replication did not find row to be updated "
1962  "in replication target relation \"%s\"",
1963  RelationGetRelationName(localrel));
1964  }
1965 
1966  /* Cleanup. */
1967  ExecCloseIndices(relinfo);
1968  EvalPlanQualEnd(&epqstate);
1969 }
1970 
1971 /*
1972  * Handle DELETE message.
1973  *
1974  * TODO: FDW support
1975  */
1976 static void
1978 {
1979  LogicalRepRelMapEntry *rel;
1980  LogicalRepTupleData oldtup;
1981  LogicalRepRelId relid;
1982  ApplyExecutionData *edata;
1983  EState *estate;
1984  TupleTableSlot *remoteslot;
1985  MemoryContext oldctx;
1986 
1987  /*
1988  * Quick return if we are skipping data modification changes or handling
1989  * streamed transactions.
1990  */
1991  if (is_skipping_changes() ||
1993  return;
1994 
1996 
1997  relid = logicalrep_read_delete(s, &oldtup);
1998  rel = logicalrep_rel_open(relid, RowExclusiveLock);
1999  if (!should_apply_changes_for_rel(rel))
2000  {
2001  /*
2002  * The relation can't become interesting in the middle of the
2003  * transaction so it's safe to unlock it.
2004  */
2007  return;
2008  }
2009 
2010  /* Set relation for error callback */
2012 
2013  /* Check if we can do the delete. */
2015 
2016  /* Initialize the executor state. */
2017  edata = create_edata_for_relation(rel);
2018  estate = edata->estate;
2019  remoteslot = ExecInitExtraTupleSlot(estate,
2020  RelationGetDescr(rel->localrel),
2021  &TTSOpsVirtual);
2022 
2023  /* Build the search tuple. */
2025  slot_store_data(remoteslot, rel, &oldtup);
2026  MemoryContextSwitchTo(oldctx);
2027 
2028  /* For a partitioned table, apply delete to correct partition. */
2029  if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2031  remoteslot, NULL, CMD_DELETE);
2032  else
2034  remoteslot);
2035 
2036  finish_edata(edata);
2037 
2038  /* Reset relation for error callback */
2040 
2042 
2044 }
2045 
2046 /*
2047  * Workhorse for apply_handle_delete()
2048  * relinfo is for the relation we're actually deleting from
2049  * (could be a child partition of edata->targetRelInfo)
2050  */
2051 static void
2053  ResultRelInfo *relinfo,
2054  TupleTableSlot *remoteslot)
2055 {
2056  EState *estate = edata->estate;
2057  Relation localrel = relinfo->ri_RelationDesc;
2058  LogicalRepRelation *remoterel = &edata->targetRel->remoterel;
2059  EPQState epqstate;
2060  TupleTableSlot *localslot;
2061  bool found;
2062 
2063  EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
2064  ExecOpenIndices(relinfo, false);
2065 
2066  found = FindReplTupleInLocalRel(estate, localrel, remoterel,
2067  remoteslot, &localslot);
2068 
2069  /* If found delete it. */
2070  if (found)
2071  {
2072  EvalPlanQualSetSlot(&epqstate, localslot);
2073 
2074  /* Do the actual delete. */
2076  ExecSimpleRelationDelete(relinfo, estate, &epqstate, localslot);
2077  }
2078  else
2079  {
2080  /*
2081  * The tuple to be deleted could not be found. Do nothing except for
2082  * emitting a log message.
2083  *
2084  * XXX should this be promoted to ereport(LOG) perhaps?
2085  */
2086  elog(DEBUG1,
2087  "logical replication did not find row to be deleted "
2088  "in replication target relation \"%s\"",
2089  RelationGetRelationName(localrel));
2090  }
2091 
2092  /* Cleanup. */
2093  ExecCloseIndices(relinfo);
2094  EvalPlanQualEnd(&epqstate);
2095 }
2096 
2097 /*
2098  * Try to find a tuple received from the publication side (in 'remoteslot') in
2099  * the corresponding local relation using either replica identity index,
2100  * primary key or if needed, sequential scan.
2101  *
2102  * Local tuple, if found, is returned in '*localslot'.
2103  */
2104 static bool
2106  LogicalRepRelation *remoterel,
2107  TupleTableSlot *remoteslot,
2108  TupleTableSlot **localslot)
2109 {
2110  Oid idxoid;
2111  bool found;
2112 
2113  /*
2114  * Regardless of the top-level operation, we're performing a read here, so
2115  * check for SELECT privileges.
2116  */
2117  TargetPrivilegesCheck(localrel, ACL_SELECT);
2118 
2119  *localslot = table_slot_create(localrel, &estate->es_tupleTable);
2120 
2121  idxoid = GetRelationIdentityOrPK(localrel);
2122  Assert(OidIsValid(idxoid) ||
2123  (remoterel->replident == REPLICA_IDENTITY_FULL));
2124 
2125  if (OidIsValid(idxoid))
2126  found = RelationFindReplTupleByIndex(localrel, idxoid,
2128  remoteslot, *localslot);
2129  else
2130  found = RelationFindReplTupleSeq(localrel, LockTupleExclusive,
2131  remoteslot, *localslot);
2132 
2133  return found;
2134 }
2135 
2136 /*
2137  * This handles insert, update, delete on a partitioned table.
2138  */
2139 static void
2141  TupleTableSlot *remoteslot,
2142  LogicalRepTupleData *newtup,
2143  CmdType operation)
2144 {
2145  EState *estate = edata->estate;
2146  LogicalRepRelMapEntry *relmapentry = edata->targetRel;
2147  ResultRelInfo *relinfo = edata->targetRelInfo;
2148  Relation parentrel = relinfo->ri_RelationDesc;
2149  ModifyTableState *mtstate;
2150  PartitionTupleRouting *proute;
2151  ResultRelInfo *partrelinfo;
2152  Relation partrel;
2153  TupleTableSlot *remoteslot_part;
2154  TupleConversionMap *map;
2155  MemoryContext oldctx;
2156  LogicalRepRelMapEntry *part_entry = NULL;
2157  AttrMap *attrmap = NULL;
2158 
2159  /* ModifyTableState is needed for ExecFindPartition(). */
2160  edata->mtstate = mtstate = makeNode(ModifyTableState);
2161  mtstate->ps.plan = NULL;
2162  mtstate->ps.state = estate;
2163  mtstate->operation = operation;
2164  mtstate->resultRelInfo = relinfo;
2165 
2166  /* ... as is PartitionTupleRouting. */
2167  edata->proute = proute = ExecSetupPartitionTupleRouting(estate, parentrel);
2168 
2169  /*
2170  * Find the partition to which the "search tuple" belongs.
2171  */
2172  Assert(remoteslot != NULL);
2174  partrelinfo = ExecFindPartition(mtstate, relinfo, proute,
2175  remoteslot, estate);
2176  Assert(partrelinfo != NULL);
2177  partrel = partrelinfo->ri_RelationDesc;
2178 
2179  /*
2180  * Check for supported relkind. We need this since partitions might be of
2181  * unsupported relkinds; and the set of partitions can change, so checking
2182  * at CREATE/ALTER SUBSCRIPTION would be insufficient.
2183  */
2184  CheckSubscriptionRelkind(partrel->rd_rel->relkind,
2186  RelationGetRelationName(partrel));
2187 
2188  /*
2189  * To perform any of the operations below, the tuple must match the
2190  * partition's rowtype. Convert if needed or just copy, using a dedicated
2191  * slot to store the tuple in any case.
2192  */
2193  remoteslot_part = partrelinfo->ri_PartitionTupleSlot;
2194  if (remoteslot_part == NULL)
2195  remoteslot_part = table_slot_create(partrel, &estate->es_tupleTable);
2196  map = ExecGetRootToChildMap(partrelinfo, estate);
2197  if (map != NULL)
2198  {
2199  attrmap = map->attrMap;
2200  remoteslot_part = execute_attr_map_slot(attrmap, remoteslot,
2201  remoteslot_part);
2202  }
2203  else
2204  {
2205  remoteslot_part = ExecCopySlot(remoteslot_part, remoteslot);
2206  slot_getallattrs(remoteslot_part);
2207  }
2208  MemoryContextSwitchTo(oldctx);
2209 
2210  /* Check if we can do the update or delete on the leaf partition. */
2211  if (operation == CMD_UPDATE || operation == CMD_DELETE)
2212  {
2213  part_entry = logicalrep_partition_open(relmapentry, partrel,
2214  attrmap);
2215  check_relation_updatable(part_entry);
2216  }
2217 
2218  switch (operation)
2219  {
2220  case CMD_INSERT:
2221  apply_handle_insert_internal(edata, partrelinfo,
2222  remoteslot_part);
2223  break;
2224 
2225  case CMD_DELETE:
2226  apply_handle_delete_internal(edata, partrelinfo,
2227  remoteslot_part);
2228  break;
2229 
2230  case CMD_UPDATE:
2231 
2232  /*
2233  * For UPDATE, depending on whether or not the updated tuple
2234  * satisfies the partition's constraint, perform a simple UPDATE
2235  * of the partition or move the updated tuple into a different
2236  * suitable partition.
2237  */
2238  {
2239  TupleTableSlot *localslot;
2240  ResultRelInfo *partrelinfo_new;
2241  Relation partrel_new;
2242  bool found;
2243 
2244  /* Get the matching local tuple from the partition. */
2245  found = FindReplTupleInLocalRel(estate, partrel,
2246  &part_entry->remoterel,
2247  remoteslot_part, &localslot);
2248  if (!found)
2249  {
2250  /*
2251  * The tuple to be updated could not be found. Do nothing
2252  * except for emitting a log message.
2253  *
2254  * XXX should this be promoted to ereport(LOG) perhaps?
2255  */
2256  elog(DEBUG1,
2257  "logical replication did not find row to be updated "
2258  "in replication target relation's partition \"%s\"",
2259  RelationGetRelationName(partrel));
2260  return;
2261  }
2262 
2263  /*
2264  * Apply the update to the local tuple, putting the result in
2265  * remoteslot_part.
2266  */
2268  slot_modify_data(remoteslot_part, localslot, part_entry,
2269  newtup);
2270  MemoryContextSwitchTo(oldctx);
2271 
2272  /*
2273  * Does the updated tuple still satisfy the current
2274  * partition's constraint?
2275  */
2276  if (!partrel->rd_rel->relispartition ||
2277  ExecPartitionCheck(partrelinfo, remoteslot_part, estate,
2278  false))
2279  {
2280  /*
2281  * Yes, so simply UPDATE the partition. We don't call
2282  * apply_handle_update_internal() here, which would
2283  * normally do the following work, to avoid repeating some
2284  * work already done above to find the local tuple in the
2285  * partition.
2286  */
2287  EPQState epqstate;
2288 
2289  EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
2290  ExecOpenIndices(partrelinfo, false);
2291 
2292  EvalPlanQualSetSlot(&epqstate, remoteslot_part);
2294  ACL_UPDATE);
2295  ExecSimpleRelationUpdate(partrelinfo, estate, &epqstate,
2296  localslot, remoteslot_part);
2297  ExecCloseIndices(partrelinfo);
2298  EvalPlanQualEnd(&epqstate);
2299  }
2300  else
2301  {
2302  /* Move the tuple into the new partition. */
2303 
2304  /*
2305  * New partition will be found using tuple routing, which
2306  * can only occur via the parent table. We might need to
2307  * convert the tuple to the parent's rowtype. Note that
2308  * this is the tuple found in the partition, not the
2309  * original search tuple received by this function.
2310  */
2311  if (map)
2312  {
2313  TupleConversionMap *PartitionToRootMap =
2315  RelationGetDescr(parentrel));
2316 
2317  remoteslot =
2318  execute_attr_map_slot(PartitionToRootMap->attrMap,
2319  remoteslot_part, remoteslot);
2320  }
2321  else
2322  {
2323  remoteslot = ExecCopySlot(remoteslot, remoteslot_part);
2324  slot_getallattrs(remoteslot);
2325  }
2326 
2327  /* Find the new partition. */
2329  partrelinfo_new = ExecFindPartition(mtstate, relinfo,
2330  proute, remoteslot,
2331  estate);
2332  MemoryContextSwitchTo(oldctx);
2333  Assert(partrelinfo_new != partrelinfo);
2334  partrel_new = partrelinfo_new->ri_RelationDesc;
2335 
2336  /* Check that new partition also has supported relkind. */
2337  CheckSubscriptionRelkind(partrel_new->rd_rel->relkind,
2339  RelationGetRelationName(partrel_new));
2340 
2341  /* DELETE old tuple found in the old partition. */
2342  apply_handle_delete_internal(edata, partrelinfo,
2343  localslot);
2344 
2345  /* INSERT new tuple into the new partition. */
2346 
2347  /*
2348  * Convert the replacement tuple to match the destination
2349  * partition rowtype.
2350  */
2352  remoteslot_part = partrelinfo_new->ri_PartitionTupleSlot;
2353  if (remoteslot_part == NULL)
2354  remoteslot_part = table_slot_create(partrel_new,
2355  &estate->es_tupleTable);
2356  map = ExecGetRootToChildMap(partrelinfo_new, estate);
2357  if (map != NULL)
2358  {
2359  remoteslot_part = execute_attr_map_slot(map->attrMap,
2360  remoteslot,
2361  remoteslot_part);
2362  }
2363  else
2364  {
2365  remoteslot_part = ExecCopySlot(remoteslot_part,
2366  remoteslot);
2367  slot_getallattrs(remoteslot);
2368  }
2369  MemoryContextSwitchTo(oldctx);
2370  apply_handle_insert_internal(edata, partrelinfo_new,
2371  remoteslot_part);
2372  }
2373  }
2374  break;
2375 
2376  default:
2377  elog(ERROR, "unrecognized CmdType: %d", (int) operation);
2378  break;
2379  }
2380 }
2381 
2382 /*
2383  * Handle TRUNCATE message.
2384  *
2385  * TODO: FDW support
2386  */
2387 static void
2389 {
2390  bool cascade = false;
2391  bool restart_seqs = false;
2392  List *remote_relids = NIL;
2393  List *remote_rels = NIL;
2394  List *rels = NIL;
2395  List *part_rels = NIL;
2396  List *relids = NIL;
2397  List *relids_logged = NIL;
2398  ListCell *lc;
2399  LOCKMODE lockmode = AccessExclusiveLock;
2400 
2401  /*
2402  * Quick return if we are skipping data modification changes or handling
2403  * streamed transactions.
2404  */
2405  if (is_skipping_changes() ||
2407  return;
2408 
2410 
2411  remote_relids = logicalrep_read_truncate(s, &cascade, &restart_seqs);
2412 
2413  foreach(lc, remote_relids)
2414  {
2415  LogicalRepRelId relid = lfirst_oid(lc);
2416  LogicalRepRelMapEntry *rel;
2417 
2418  rel = logicalrep_rel_open(relid, lockmode);
2419  if (!should_apply_changes_for_rel(rel))
2420  {
2421  /*
2422  * The relation can't become interesting in the middle of the
2423  * transaction so it's safe to unlock it.
2424  */
2425  logicalrep_rel_close(rel, lockmode);
2426  continue;
2427  }
2428 
2429  remote_rels = lappend(remote_rels, rel);
2431  rels = lappend(rels, rel->localrel);
2432  relids = lappend_oid(relids, rel->localreloid);
2434  relids_logged = lappend_oid(relids_logged, rel->localreloid);
2435 
2436  /*
2437  * Truncate partitions if we got a message to truncate a partitioned
2438  * table.
2439  */
2440  if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2441  {
2442  ListCell *child;
2443  List *children = find_all_inheritors(rel->localreloid,
2444  lockmode,
2445  NULL);
2446 
2447  foreach(child, children)
2448  {
2449  Oid childrelid = lfirst_oid(child);
2450  Relation childrel;
2451 
2452  if (list_member_oid(relids, childrelid))
2453  continue;
2454 
2455  /* find_all_inheritors already got lock */
2456  childrel = table_open(childrelid, NoLock);
2457 
2458  /*
2459  * Ignore temp tables of other backends. See similar code in
2460  * ExecuteTruncate().
2461  */
2462  if (RELATION_IS_OTHER_TEMP(childrel))
2463  {
2464  table_close(childrel, lockmode);
2465  continue;
2466  }
2467 
2469  rels = lappend(rels, childrel);
2470  part_rels = lappend(part_rels, childrel);
2471  relids = lappend_oid(relids, childrelid);
2472  /* Log this relation only if needed for logical decoding */
2473  if (RelationIsLogicallyLogged(childrel))
2474  relids_logged = lappend_oid(relids_logged, childrelid);
2475  }
2476  }
2477  }
2478 
2479  /*
2480  * Even if we used CASCADE on the upstream primary we explicitly default
2481  * to replaying changes without further cascading. This might be later
2482  * changeable with a user specified option.
2483  */
2484  ExecuteTruncateGuts(rels,
2485  relids,
2486  relids_logged,
2487  DROP_RESTRICT,
2488  restart_seqs);
2489  foreach(lc, remote_rels)
2490  {
2491  LogicalRepRelMapEntry *rel = lfirst(lc);
2492 
2494  }
2495  foreach(lc, part_rels)
2496  {
2497  Relation rel = lfirst(lc);
2498 
2499  table_close(rel, NoLock);
2500  }
2501 
2503 }
2504 
2505 
2506 /*
2507  * Logical replication protocol message dispatcher.
2508  */
2509 static void
2511 {
2513  LogicalRepMsgType saved_command;
2514 
2515  /*
2516  * Set the current command being applied. Since this function can be
2517  * called recursively when applying spooled changes, save the current
2518  * command.
2519  */
2520  saved_command = apply_error_callback_arg.command;
2522 
2523  switch (action)
2524  {
2525  case LOGICAL_REP_MSG_BEGIN:
2526  apply_handle_begin(s);
2527  break;
2528 
2531  break;
2532 
2535  break;
2536 
2539  break;
2540 
2543  break;
2544 
2547  break;
2548 
2551  break;
2552 
2553  case LOGICAL_REP_MSG_TYPE:
2554  apply_handle_type(s);
2555  break;
2556 
2559  break;
2560 
2562 
2563  /*
2564  * Logical replication does not use generic logical messages yet.
2565  * Although, it could be used by other applications that use this
2566  * output plugin.
2567  */
2568  break;
2569 
2572  break;
2573 
2576  break;
2577 
2580  break;
2581 
2584  break;
2585 
2588  break;
2589 
2592  break;
2593 
2596  break;
2597 
2600  break;
2601 
2604  break;
2605 
2606  default:
2607  ereport(ERROR,
2608  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2609  errmsg("invalid logical replication message type \"%c\"", action)));
2610  }
2611 
2612  /* Reset the current command */
2613  apply_error_callback_arg.command = saved_command;
2614 }
2615 
2616 /*
2617  * Figure out which write/flush positions to report to the walsender process.
2618  *
2619  * We can't simply report back the last LSN the walsender sent us because the
2620  * local transaction might not yet be flushed to disk locally. Instead we
2621  * build a list that associates local with remote LSNs for every commit. When
2622  * reporting back the flush position to the sender we iterate that list and
2623  * check which entries on it are already locally flushed. Those we can report
2624  * as having been flushed.
2625  *
2626  * The have_pending_txes is true if there are outstanding transactions that
2627  * need to be flushed.
2628  */
2629 static void
2631  bool *have_pending_txes)
2632 {
2633  dlist_mutable_iter iter;
2634  XLogRecPtr local_flush = GetFlushRecPtr(NULL);
2635 
2637  *flush = InvalidXLogRecPtr;
2638 
2640  {
2641  FlushPosition *pos =
2642  dlist_container(FlushPosition, node, iter.cur);
2643 
2644  *write = pos->remote_end;
2645 
2646  if (pos->local_end <= local_flush)
2647  {
2648  *flush = pos->remote_end;
2649  dlist_delete(iter.cur);
2650  pfree(pos);
2651  }
2652  else
2653  {
2654  /*
2655  * Don't want to uselessly iterate over the rest of the list which
2656  * could potentially be long. Instead get the last element and
2657  * grab the write position from there.
2658  */
2659  pos = dlist_tail_element(FlushPosition, node,
2660  &lsn_mapping);
2661  *write = pos->remote_end;
2662  *have_pending_txes = true;
2663  return;
2664  }
2665  }
2666 
2667  *have_pending_txes = !dlist_is_empty(&lsn_mapping);
2668 }
2669 
2670 /*
2671  * Store current remote/local lsn pair in the tracking list.
2672  */
2673 static void
2675 {
2676  FlushPosition *flushpos;
2677 
2678  /* Need to do this in permanent context */
2680 
2681  /* Track commit lsn */
2682  flushpos = (FlushPosition *) palloc(sizeof(FlushPosition));
2683  flushpos->local_end = XactLastCommitEnd;
2684  flushpos->remote_end = remote_lsn;
2685 
2686  dlist_push_tail(&lsn_mapping, &flushpos->node);
2688 }
2689 
2690 
2691 /* Update statistics of the worker. */
2692 static void
2693 UpdateWorkerStats(XLogRecPtr last_lsn, TimestampTz send_time, bool reply)
2694 {
2695  MyLogicalRepWorker->last_lsn = last_lsn;
2696  MyLogicalRepWorker->last_send_time = send_time;
2698  if (reply)
2699  {
2700  MyLogicalRepWorker->reply_lsn = last_lsn;
2701  MyLogicalRepWorker->reply_time = send_time;
2702  }
2703 }
2704 
2705 /*
2706  * Apply main loop.
2707  */
2708 static void
2710 {
2711  TimestampTz last_recv_timestamp = GetCurrentTimestamp();
2712  bool ping_sent = false;
2713  TimeLineID tli;
2714  ErrorContextCallback errcallback;
2715 
2716  /*
2717  * Init the ApplyMessageContext which we clean up after each replication
2718  * protocol message.
2719  */
2721  "ApplyMessageContext",
2723 
2724  /*
2725  * This memory context is used for per-stream data when the streaming mode
2726  * is enabled. This context is reset on each stream stop.
2727  */
2729  "LogicalStreamingContext",
2731 
2732  /* mark as idle, before starting to loop */
2734 
2735  /*
2736  * Push apply error context callback. Fields will be filled while applying
2737  * a change.
2738  */
2739  errcallback.callback = apply_error_callback;
2740  errcallback.previous = error_context_stack;
2741  error_context_stack = &errcallback;
2742 
2743  /* This outer loop iterates once per wait. */
2744  for (;;)
2745  {
2747  int rc;
2748  int len;
2749  char *buf = NULL;
2750  bool endofstream = false;
2751  long wait_time;
2752 
2754 
2756 
2758 
2759  if (len != 0)
2760  {
2761  /* Loop to process all available data (without blocking). */
2762  for (;;)
2763  {
2765 
2766  if (len == 0)
2767  {
2768  break;
2769  }
2770  else if (len < 0)
2771  {
2772  ereport(LOG,
2773  (errmsg("data stream from publisher has ended")));
2774  endofstream = true;
2775  break;
2776  }
2777  else
2778  {
2779  int c;
2780  StringInfoData s;
2781 
2782  /* Reset timeout. */
2783  last_recv_timestamp = GetCurrentTimestamp();
2784  ping_sent = false;
2785 
2786  /* Ensure we are reading the data into our memory context. */
2788 
2789  s.data = buf;
2790  s.len = len;
2791  s.cursor = 0;
2792  s.maxlen = -1;
2793 
2794  c = pq_getmsgbyte(&s);
2795 
2796  if (c == 'w')
2797  {
2798  XLogRecPtr start_lsn;
2799  XLogRecPtr end_lsn;
2800  TimestampTz send_time;
2801 
2802  start_lsn = pq_getmsgint64(&s);
2803  end_lsn = pq_getmsgint64(&s);
2804  send_time = pq_getmsgint64(&s);
2805 
2806  if (last_received < start_lsn)
2807  last_received = start_lsn;
2808 
2809  if (last_received < end_lsn)
2810  last_received = end_lsn;
2811 
2812  UpdateWorkerStats(last_received, send_time, false);
2813 
2814  apply_dispatch(&s);
2815  }
2816  else if (c == 'k')
2817  {
2818  XLogRecPtr end_lsn;
2820  bool reply_requested;
2821 
2822  end_lsn = pq_getmsgint64(&s);
2823  timestamp = pq_getmsgint64(&s);
2824  reply_requested = pq_getmsgbyte(&s);
2825 
2826  if (last_received < end_lsn)
2827  last_received = end_lsn;
2828 
2829  send_feedback(last_received, reply_requested, false);
2830  UpdateWorkerStats(last_received, timestamp, true);
2831  }
2832  /* other message types are purposefully ignored */
2833 
2835  }
2836 
2838  }
2839  }
2840 
2841  /* confirm all writes so far */
2842  send_feedback(last_received, false, false);
2843 
2845  {
2846  /*
2847  * If we didn't get any transactions for a while there might be
2848  * unconsumed invalidation messages in the queue, consume them
2849  * now.
2850  */
2853 
2854  /* Process any table synchronization changes. */
2855  process_syncing_tables(last_received);
2856  }
2857 
2858  /* Cleanup the memory. */
2861 
2862  /* Check if we need to exit the streaming loop. */
2863  if (endofstream)
2864  break;
2865 
2866  /*
2867  * Wait for more data or latch. If we have unflushed transactions,
2868  * wake up after WalWriterDelay to see if they've been flushed yet (in
2869  * which case we should send a feedback message). Otherwise, there's
2870  * no particular urgency about waking up unless we get data or a
2871  * signal.
2872  */
2873  if (!dlist_is_empty(&lsn_mapping))
2874  wait_time = WalWriterDelay;
2875  else
2876  wait_time = NAPTIME_PER_CYCLE;
2877 
2881  fd, wait_time,
2883 
2884  if (rc & WL_LATCH_SET)
2885  {
2888  }
2889 
2890  if (ConfigReloadPending)
2891  {
2892  ConfigReloadPending = false;
2894  }
2895 
2896  if (rc & WL_TIMEOUT)
2897  {
2898  /*
2899  * We didn't receive anything new. If we haven't heard anything
2900  * from the server for more than wal_receiver_timeout / 2, ping
2901  * the server. Also, if it's been longer than
2902  * wal_receiver_status_interval since the last update we sent,
2903  * send a status update to the primary anyway, to report any
2904  * progress in applying WAL.
2905  */
2906  bool requestReply = false;
2907 
2908  /*
2909  * Check if time since last receive from primary has reached the
2910  * configured limit.
2911  */
2912  if (wal_receiver_timeout > 0)
2913  {
2915  TimestampTz timeout;
2916 
2917  timeout =
2918  TimestampTzPlusMilliseconds(last_recv_timestamp,
2920 
2921  if (now >= timeout)
2922  ereport(ERROR,
2923  (errcode(ERRCODE_CONNECTION_FAILURE),
2924  errmsg("terminating logical replication worker due to timeout")));
2925 
2926  /* Check to see if it's time for a ping. */
2927  if (!ping_sent)
2928  {
2929  timeout = TimestampTzPlusMilliseconds(last_recv_timestamp,
2930  (wal_receiver_timeout / 2));
2931  if (now >= timeout)
2932  {
2933  requestReply = true;
2934  ping_sent = true;
2935  }
2936  }
2937  }
2938 
2939  send_feedback(last_received, requestReply, requestReply);
2940 
2941  /*
2942  * Force reporting to ensure long idle periods don't lead to
2943  * arbitrarily delayed stats. Stats can only be reported outside
2944  * of (implicit or explicit) transactions. That shouldn't lead to
2945  * stats being delayed for long, because transactions are either
2946  * sent as a whole on commit or streamed. Streamed transactions
2947  * are spilled to disk and applied on commit.
2948  */
2949  if (!IsTransactionState())
2950  pgstat_report_stat(true);
2951  }
2952  }
2953 
2954  /* Pop the error context stack */
2955  error_context_stack = errcallback.previous;
2956 
2957  /* All done */
2959 }
2960 
2961 /*
2962  * Send a Standby Status Update message to server.
2963  *
2964  * 'recvpos' is the latest LSN we've received data to, force is set if we need
2965  * to send a response to avoid timeouts.
2966  */
2967 static void
2968 send_feedback(XLogRecPtr recvpos, bool force, bool requestReply)
2969 {
2970  static StringInfo reply_message = NULL;
2971  static TimestampTz send_time = 0;
2972 
2973  static XLogRecPtr last_recvpos = InvalidXLogRecPtr;
2974  static XLogRecPtr last_writepos = InvalidXLogRecPtr;
2975  static XLogRecPtr last_flushpos = InvalidXLogRecPtr;
2976 
2977  XLogRecPtr writepos;
2978  XLogRecPtr flushpos;
2979  TimestampTz now;
2980  bool have_pending_txes;
2981 
2982  /*
2983  * If the user doesn't want status to be reported to the publisher, be
2984  * sure to exit before doing anything at all.
2985  */
2986  if (!force && wal_receiver_status_interval <= 0)
2987  return;
2988 
2989  /* It's legal to not pass a recvpos */
2990  if (recvpos < last_recvpos)
2991  recvpos = last_recvpos;
2992 
2993  get_flush_position(&writepos, &flushpos, &have_pending_txes);
2994 
2995  /*
2996  * No outstanding transactions to flush, we can report the latest received
2997  * position. This is important for synchronous replication.
2998  */
2999  if (!have_pending_txes)
3000  flushpos = writepos = recvpos;
3001 
3002  if (writepos < last_writepos)
3003  writepos = last_writepos;
3004 
3005  if (flushpos < last_flushpos)
3006  flushpos = last_flushpos;
3007 
3009 
3010  /* if we've already reported everything we're good */
3011  if (!force &&
3012  writepos == last_writepos &&
3013  flushpos == last_flushpos &&
3014  !TimestampDifferenceExceeds(send_time, now,
3016  return;
3017  send_time = now;
3018 
3019  if (!reply_message)
3020  {
3022 
3024  MemoryContextSwitchTo(oldctx);
3025  }
3026  else
3028 
3029  pq_sendbyte(reply_message, 'r');
3030  pq_sendint64(reply_message, recvpos); /* write */
3031  pq_sendint64(reply_message, flushpos); /* flush */
3032  pq_sendint64(reply_message, writepos); /* apply */
3033  pq_sendint64(reply_message, now); /* sendTime */
3034  pq_sendbyte(reply_message, requestReply); /* replyRequested */
3035 
3036  elog(DEBUG2, "sending feedback (force %d) to recv %X/%X, write %X/%X, flush %X/%X",
3037  force,
3038  LSN_FORMAT_ARGS(recvpos),
3039  LSN_FORMAT_ARGS(writepos),
3040  LSN_FORMAT_ARGS(flushpos));
3041 
3044 
3045  if (recvpos > last_recvpos)
3046  last_recvpos = recvpos;
3047  if (writepos > last_writepos)
3048  last_writepos = writepos;
3049  if (flushpos > last_flushpos)
3050  last_flushpos = flushpos;
3051 }
3052 
3053 /*
3054  * Reread subscription info if needed. Most changes will be exit.
3055  */
3056 static void
3058 {
3059  MemoryContext oldctx;
3061  bool started_tx = false;
3062 
3063  /* When cache state is valid there is nothing to do here. */
3064  if (MySubscriptionValid)
3065  return;
3066 
3067  /* This function might be called inside or outside of transaction. */
3068  if (!IsTransactionState())
3069  {
3071  started_tx = true;
3072  }
3073 
3074  /* Ensure allocations in permanent context. */
3076 
3078 
3079  /*
3080  * Exit if the subscription was removed. This normally should not happen
3081  * as the worker gets killed during DROP SUBSCRIPTION.
3082  */
3083  if (!newsub)
3084  {
3085  ereport(LOG,
3086  (errmsg("logical replication apply worker for subscription \"%s\" will "
3087  "stop because the subscription was removed",
3088  MySubscription->name)));
3089 
3090  proc_exit(0);
3091  }
3092 
3093  /* Exit if the subscription was disabled. */
3094  if (!newsub->enabled)
3095  {
3096  ereport(LOG,
3097  (errmsg("logical replication apply worker for subscription \"%s\" will "
3098  "stop because the subscription was disabled",
3099  MySubscription->name)));
3100 
3101  proc_exit(0);
3102  }
3103 
3104  /* !slotname should never happen when enabled is true. */
3105  Assert(newsub->slotname);
3106 
3107  /* two-phase should not be altered */
3108  Assert(newsub->twophasestate == MySubscription->twophasestate);
3109 
3110  /*
3111  * Exit if any parameter that affects the remote connection was changed.
3112  * The launcher will start a new worker.
3113  */
3114  if (strcmp(newsub->conninfo, MySubscription->conninfo) != 0 ||
3115  strcmp(newsub->name, MySubscription->name) != 0 ||
3116  strcmp(newsub->slotname, MySubscription->slotname) != 0 ||
3117  newsub->binary != MySubscription->binary ||
3118  newsub->stream != MySubscription->stream ||
3119  strcmp(newsub->origin, MySubscription->origin) != 0 ||
3120  newsub->owner != MySubscription->owner ||
3121  !equal(newsub->publications, MySubscription->publications))
3122  {
3123  ereport(LOG,
3124  (errmsg("logical replication apply worker for subscription \"%s\" will restart because of a parameter change",
3125  MySubscription->name)));
3126 
3127  proc_exit(0);
3128  }
3129 
3130  /* Check for other changes that should never happen too. */
3131  if (newsub->dbid != MySubscription->dbid)
3132  {
3133  elog(ERROR, "subscription %u changed unexpectedly",
3135  }
3136 
3137  /* Clean old subscription info and switch to new one. */
3140 
3141  MemoryContextSwitchTo(oldctx);
3142 
3143  /* Change synchronous commit according to the user's wishes */
3144  SetConfigOption("synchronous_commit", MySubscription->synccommit,
3146 
3147  if (started_tx)
3149 
3150  MySubscriptionValid = true;
3151 }
3152 
3153 /*
3154  * Callback from subscription syscache invalidation.
3155  */
3156 static void
3157 subscription_change_cb(Datum arg, int cacheid, uint32 hashvalue)
3158 {
3159  MySubscriptionValid = false;
3160 }
3161 
3162 /*
3163  * subxact_info_write
3164  * Store information about subxacts for a toplevel transaction.
3165  *
3166  * For each subxact we store offset of it's first change in the main file.
3167  * The file is always over-written as a whole.
3168  *
3169  * XXX We should only store subxacts that were not aborted yet.
3170  */
3171 static void
3173 {
3174  char path[MAXPGPATH];
3175  Size len;
3176  BufFile *fd;
3177 
3179 
3180  /* construct the subxact filename */
3181  subxact_filename(path, subid, xid);
3182 
3183  /* Delete the subxacts file, if exists. */
3184  if (subxact_data.nsubxacts == 0)
3185  {
3188 
3189  return;
3190  }
3191 
3192  /*
3193  * Create the subxact file if it not already created, otherwise open the
3194  * existing file.
3195  */
3197  true);
3198  if (fd == NULL)
3200 
3201  len = sizeof(SubXactInfo) * subxact_data.nsubxacts;
3202 
3203  /* Write the subxact count and subxact info */
3206 
3207  BufFileClose(fd);
3208 
3209  /* free the memory allocated for subxact info */
3211 }
3212 
3213 /*
3214  * subxact_info_read
3215  * Restore information about subxacts of a streamed transaction.
3216  *
3217  * Read information about subxacts into the structure subxact_data that can be
3218  * used later.
3219  */
3220 static void
3222 {
3223  char path[MAXPGPATH];
3224  Size len;
3225  BufFile *fd;
3226  MemoryContext oldctx;
3227 
3231 
3232  /*
3233  * If the subxact file doesn't exist that means we don't have any subxact
3234  * info.
3235  */
3236  subxact_filename(path, subid, xid);
3238  true);
3239  if (fd == NULL)
3240  return;
3241 
3242  /* read number of subxact items */
3244  sizeof(subxact_data.nsubxacts)) !=
3245  sizeof(subxact_data.nsubxacts))
3246  ereport(ERROR,
3248  errmsg("could not read from streaming transaction's subxact file \"%s\": %m",
3249  path)));
3250 
3251  len = sizeof(SubXactInfo) * subxact_data.nsubxacts;
3252 
3253  /* we keep the maximum as a power of 2 */
3255 
3256  /*
3257  * Allocate subxact information in the logical streaming context. We need
3258  * this information during the complete stream so that we can add the sub
3259  * transaction info to this. On stream stop we will flush this information
3260  * to the subxact file and reset the logical streaming context.
3261  */
3264  sizeof(SubXactInfo));
3265  MemoryContextSwitchTo(oldctx);
3266 
3267  if ((len > 0) && ((BufFileRead(fd, subxact_data.subxacts, len)) != len))
3268  ereport(ERROR,
3270  errmsg("could not read from streaming transaction's subxact file \"%s\": %m",
3271  path)));
3272 
3273  BufFileClose(fd);
3274 }
3275 
3276 /*
3277  * subxact_info_add
3278  * Add information about a subxact (offset in the main file).
3279  */
3280 static void
3282 {
3283  SubXactInfo *subxacts = subxact_data.subxacts;
3284  int64 i;
3285 
3286  /* We must have a valid top level stream xid and a stream fd. */
3288  Assert(stream_fd != NULL);
3289 
3290  /*
3291  * If the XID matches the toplevel transaction, we don't want to add it.
3292  */
3293  if (stream_xid == xid)
3294  return;
3295 
3296  /*
3297  * In most cases we're checking the same subxact as we've already seen in
3298  * the last call, so make sure to ignore it (this change comes later).
3299  */
3300  if (subxact_data.subxact_last == xid)
3301  return;
3302 
3303  /* OK, remember we're processing this XID. */
3304  subxact_data.subxact_last = xid;
3305 
3306  /*
3307  * Check if the transaction is already present in the array of subxact. We
3308  * intentionally scan the array from the tail, because we're likely adding
3309  * a change for the most recent subtransactions.
3310  *
3311  * XXX Can we rely on the subxact XIDs arriving in sorted order? That
3312  * would allow us to use binary search here.
3313  */
3314  for (i = subxact_data.nsubxacts; i > 0; i--)
3315  {
3316  /* found, so we're done */
3317  if (subxacts[i - 1].xid == xid)
3318  return;
3319  }
3320 
3321  /* This is a new subxact, so we need to add it to the array. */
3322  if (subxact_data.nsubxacts == 0)
3323  {
3324  MemoryContext oldctx;
3325 
3327 
3328  /*
3329  * Allocate this memory for subxacts in per-stream context, see
3330  * subxact_info_read.
3331  */
3333  subxacts = palloc(subxact_data.nsubxacts_max * sizeof(SubXactInfo));
3334  MemoryContextSwitchTo(oldctx);
3335  }
3337  {
3339  subxacts = repalloc(subxacts,
3341  }
3342 
3343  subxacts[subxact_data.nsubxacts].xid = xid;
3344 
3345  /*
3346  * Get the current offset of the stream file and store it as offset of
3347  * this subxact.
3348  */
3350  &subxacts[subxact_data.nsubxacts].fileno,
3351  &subxacts[subxact_data.nsubxacts].offset);
3352 
3354  subxact_data.subxacts = subxacts;
3355 }
3356 
3357 /* format filename for file containing the info about subxacts */
3358 static inline void
3359 subxact_filename(char *path, Oid subid, TransactionId xid)
3360 {
3361  snprintf(path, MAXPGPATH, "%u-%u.subxacts", subid, xid);
3362 }
3363 
3364 /* format filename for file containing serialized changes */
3365 static inline void
3366 changes_filename(char *path, Oid subid, TransactionId xid)
3367 {
3368  snprintf(path, MAXPGPATH, "%u-%u.changes", subid, xid);
3369 }
3370 
3371 /*
3372  * stream_cleanup_files
3373  * Cleanup files for a subscription / toplevel transaction.
3374  *
3375  * Remove files with serialized changes and subxact info for a particular
3376  * toplevel transaction. Each subscription has a separate set of files
3377  * for any toplevel transaction.
3378  */
3379 static void
3381 {
3382  char path[MAXPGPATH];
3383 
3384  /* Delete the changes file. */
3385  changes_filename(path, subid, xid);
3387 
3388  /* Delete the subxact file, if it exists. */
3389  subxact_filename(path, subid, xid);
3391 }
3392 
3393 /*
3394  * stream_open_file
3395  * Open a file that we'll use to serialize changes for a toplevel
3396  * transaction.
3397  *
3398  * Open a file for streamed changes from a toplevel transaction identified
3399  * by stream_xid (global variable). If it's the first chunk of streamed
3400  * changes for this transaction, create the buffile, otherwise open the
3401  * previously created file.
3402  *
3403  * This can only be called at the beginning of a "streaming" block, i.e.
3404  * between stream_start/stream_stop messages from the upstream.
3405  */
3406 static void
3407 stream_open_file(Oid subid, TransactionId xid, bool first_segment)
3408 {
3409  char path[MAXPGPATH];
3410  MemoryContext oldcxt;
3411 
3413  Assert(OidIsValid(subid));
3415  Assert(stream_fd == NULL);
3416 
3417 
3418  changes_filename(path, subid, xid);
3419  elog(DEBUG1, "opening file \"%s\" for streamed changes", path);
3420 
3421  /*
3422  * Create/open the buffiles under the logical streaming context so that we
3423  * have those files until stream stop.
3424  */
3426 
3427  /*
3428  * If this is the first streamed segment, create the changes file.
3429  * Otherwise, just open the file for writing, in append mode.
3430  */
3431  if (first_segment)
3433  path);
3434  else
3435  {
3436  /*
3437  * Open the file and seek to the end of the file because we always
3438  * append the changes file.
3439  */
3441  path, O_RDWR, false);
3442  BufFileSeek(stream_fd, 0, 0, SEEK_END);
3443  }
3444 
3445  MemoryContextSwitchTo(oldcxt);
3446 }
3447 
3448 /*
3449  * stream_close_file
3450  * Close the currently open file with streamed changes.
3451  *
3452  * This can only be called at the end of a streaming block, i.e. at stream_stop
3453  * message from the upstream.
3454  */
3455 static void
3457 {
3460  Assert(stream_fd != NULL);
3461 
3463 
3465  stream_fd = NULL;
3466 }
3467 
3468 /*
3469  * stream_write_change
3470  * Serialize a change to a file for the current toplevel transaction.
3471  *
3472  * The change is serialized in a simple format, with length (not including
3473  * the length), action code (identifying the message type) and message
3474  * contents (without the subxact TransactionId value).
3475  */
3476 static void
3478 {
3479  int len;
3480 
3483  Assert(stream_fd != NULL);
3484 
3485  /* total on-disk size, including the action type character */
3486  len = (s->len - s->cursor) + sizeof(char);
3487 
3488  /* first write the size */
3489  BufFileWrite(stream_fd, &len, sizeof(len));
3490 
3491  /* then the action */
3492  BufFileWrite(stream_fd, &action, sizeof(action));
3493 
3494  /* and finally the remaining part of the buffer (after the XID) */
3495  len = (s->len - s->cursor);
3496 
3497  BufFileWrite(stream_fd, &s->data[s->cursor], len);
3498 }
3499 
3500 /*
3501  * Cleanup the memory for subxacts and reset the related variables.
3502  */
3503 static inline void
3505 {
3506  if (subxact_data.subxacts)
3508 
3509  subxact_data.subxacts = NULL;
3511  subxact_data.nsubxacts = 0;
3513 }
3514 
3515 /*
3516  * Form the prepared transaction GID for two_phase transactions.
3517  *
3518  * Return the GID in the supplied buffer.
3519  */
3520 static void
3521 TwoPhaseTransactionGid(Oid subid, TransactionId xid, char *gid, int szgid)
3522 {
3523  Assert(subid != InvalidRepOriginId);
3524 
3525  if (!TransactionIdIsValid(xid))
3526  ereport(ERROR,
3527  (errcode(ERRCODE_PROTOCOL_VIOLATION),
3528  errmsg_internal("invalid two-phase transaction ID")));
3529 
3530  snprintf(gid, szgid, "pg_gid_%u_%u", subid, xid);
3531 }
3532 
3533 /*
3534  * Execute the initial sync with error handling. Disable the subscription,
3535  * if it's required.
3536  *
3537  * Allocate the slot name in long-lived context on return. Note that we don't
3538  * handle FATAL errors which are probably because of system resource error and
3539  * are not repeatable.
3540  */
3541 static void
3542 start_table_sync(XLogRecPtr *origin_startpos, char **myslotname)
3543 {
3544  char *syncslotname = NULL;
3545 
3547 
3548  PG_TRY();
3549  {
3550  /* Call initial sync. */
3551  syncslotname = LogicalRepSyncTableStart(origin_startpos);
3552  }
3553  PG_CATCH();
3554  {
3557  else
3558  {
3559  /*
3560  * Report the worker failed during table synchronization. Abort
3561  * the current transaction so that the stats message is sent in an
3562  * idle state.
3563  */
3566 
3567  PG_RE_THROW();
3568  }
3569  }
3570  PG_END_TRY();
3571 
3572  /* allocate slot name in long-lived context */
3573  *myslotname = MemoryContextStrdup(ApplyContext, syncslotname);
3574  pfree(syncslotname);
3575 }
3576 
3577 /*
3578  * Run the apply loop with error handling. Disable the subscription,
3579  * if necessary.
3580  *
3581  * Note that we don't handle FATAL errors which are probably because
3582  * of system resource error and are not repeatable.
3583  */
3584 static void
3585 start_apply(XLogRecPtr origin_startpos)
3586 {
3587  PG_TRY();
3588  {
3589  LogicalRepApplyLoop(origin_startpos);
3590  }
3591  PG_CATCH();
3592  {
3595  else
3596  {
3597  /*
3598  * Report the worker failed while applying changes. Abort the
3599  * current transaction so that the stats message is sent in an
3600  * idle state.
3601  */
3604 
3605  PG_RE_THROW();
3606  }
3607  }
3608  PG_END_TRY();
3609 }
3610 
3611 /* Logical Replication Apply worker entry point */
3612 void
3614 {
3615  int worker_slot = DatumGetInt32(main_arg);
3616  MemoryContext oldctx;
3617  char originname[NAMEDATALEN];
3618  XLogRecPtr origin_startpos = InvalidXLogRecPtr;
3619  char *myslotname = NULL;
3621  int server_version;
3622 
3623  /* Attach to slot */
3624  logicalrep_worker_attach(worker_slot);
3625 
3626  /* Setup signal handling */
3628  pqsignal(SIGTERM, die);
3630 
3631  /*
3632  * We don't currently need any ResourceOwner in a walreceiver process, but
3633  * if we did, we could call CreateAuxProcessResourceOwner here.
3634  */
3635 
3636  /* Initialise stats to a sanish value */
3639 
3640  /* Load the libpq-specific functions */
3641  load_file("libpqwalreceiver", false);
3642 
3643  /* Run as replica session replication role. */
3644  SetConfigOption("session_replication_role", "replica",
3646 
3647  /* Connect to our database. */
3650  0);
3651 
3652  /*
3653  * Set always-secure search path, so malicious users can't redirect user
3654  * code (e.g. pg_index.indexprs).
3655  */
3656  SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
3657 
3658  /* Load the subscription into persistent memory context. */
3660  "ApplyContext",
3664 
3666  if (!MySubscription)
3667  {
3668  ereport(LOG,
3669  (errmsg("logical replication apply worker for subscription %u will not "
3670  "start because the subscription was removed during startup",
3672  proc_exit(0);
3673  }
3674 
3675  MySubscriptionValid = true;
3676  MemoryContextSwitchTo(oldctx);
3677 
3678  if (!MySubscription->enabled)
3679  {
3680  ereport(LOG,
3681  (errmsg("logical replication apply worker for subscription \"%s\" will not "
3682  "start because the subscription was disabled during startup",
3683  MySubscription->name)));
3684 
3685  proc_exit(0);
3686  }
3687 
3688  /* Setup synchronous commit according to the user's wishes */
3689  SetConfigOption("synchronous_commit", MySubscription->synccommit,
3691 
3692  /* Keep us informed about subscription changes. */
3695  (Datum) 0);
3696 
3697  if (am_tablesync_worker())
3698  ereport(LOG,
3699  (errmsg("logical replication table synchronization worker for subscription \"%s\", table \"%s\" has started",
3701  else
3702  ereport(LOG,
3703  (errmsg("logical replication apply worker for subscription \"%s\" has started",
3704  MySubscription->name)));
3705 
3707 
3708  /* Connect to the origin and start the replication. */
3709  elog(DEBUG1, "connecting to publisher using connection string \"%s\"",
3711 
3712  if (am_tablesync_worker())
3713  {
3714  start_table_sync(&origin_startpos, &myslotname);
3715 
3716  /*
3717  * Allocate the origin name in long-lived context for error context
3718  * message.
3719  */
3722  originname,
3723  sizeof(originname));
3725  originname);
3726  }
3727  else
3728  {
3729  /* This is main apply worker */
3730  RepOriginId originid;
3731  TimeLineID startpointTLI;
3732  char *err;
3733 
3734  myslotname = MySubscription->slotname;
3735 
3736  /*
3737  * This shouldn't happen if the subscription is enabled, but guard
3738  * against DDL bugs or manual catalog changes. (libpqwalreceiver will
3739  * crash if slot is NULL.)
3740  */
3741  if (!myslotname)
3742  ereport(ERROR,
3743  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
3744  errmsg("subscription has no replication slot set")));
3745 
3746  /* Setup replication origin tracking. */
3749  originname, sizeof(originname));
3750  originid = replorigin_by_name(originname, true);
3751  if (!OidIsValid(originid))
3752  originid = replorigin_create(originname);
3753  replorigin_session_setup(originid);
3754  replorigin_session_origin = originid;
3755  origin_startpos = replorigin_session_get_progress(false);
3757 
3759  MySubscription->name, &err);
3760  if (LogRepWorkerWalRcvConn == NULL)
3761  ereport(ERROR,
3762  (errcode(ERRCODE_CONNECTION_FAILURE),
3763  errmsg("could not connect to the publisher: %s", err)));
3764 
3765  /*
3766  * We don't really use the output identify_system for anything but it
3767  * does some initializations on the upstream so let's still call it.
3768  */
3769  (void) walrcv_identify_system(LogRepWorkerWalRcvConn, &startpointTLI);
3770 
3771  /*
3772  * Allocate the origin name in long-lived context for error context
3773  * message.
3774  */
3776  originname);
3777  }
3778 
3779  /*
3780  * Setup callback for syscache so that we know when something changes in
3781  * the subscription relation state.
3782  */
3785  (Datum) 0);
3786 
3787  /* Build logical replication streaming options. */
3788  options.logical = true;
3789  options.startpoint = origin_startpos;
3790  options.slotname = myslotname;
3791 
3793  options.proto.logical.proto_version =
3797 
3798  options.proto.logical.publication_names = MySubscription->publications;
3799  options.proto.logical.binary = MySubscription->binary;
3800  options.proto.logical.streaming = MySubscription->stream;
3801  options.proto.logical.twophase = false;
3802  options.proto.logical.origin = pstrdup(MySubscription->origin);
3803 
3804  if (!am_tablesync_worker())
3805  {
3806  /*
3807  * Even when the two_phase mode is requested by the user, it remains
3808  * as the tri-state PENDING until all tablesyncs have reached READY
3809  * state. Only then, can it become ENABLED.
3810  *
3811  * Note: If the subscription has no tables then leave the state as
3812  * PENDING, which allows ALTER SUBSCRIPTION ... REFRESH PUBLICATION to
3813  * work.
3814  */
3817  {
3818  /* Start streaming with two_phase enabled */
3819  options.proto.logical.twophase = true;
3821 
3826  }
3827  else
3828  {
3830  }
3831 
3832  ereport(DEBUG1,
3833  (errmsg_internal("logical replication apply worker for subscription \"%s\" two_phase is %s",
3838  "?")));
3839  }
3840  else
3841  {
3842  /* Start normal logical streaming replication. */
3844  }
3845 
3846  /* Run the main loop. */
3847  start_apply(origin_startpos);
3848 
3849  proc_exit(0);
3850 }
3851 
3852 /*
3853  * After error recovery, disable the subscription in a new transaction
3854  * and exit cleanly.
3855  */
3856 static void
3858 {
3859  /*
3860  * Emit the error message, and recover from the error state to an idle
3861  * state
3862  */
3863  HOLD_INTERRUPTS();
3864 
3865  EmitErrorReport();
3867  FlushErrorState();
3868 
3870 
3871  /* Report the worker failed during either table synchronization or apply */
3873  !am_tablesync_worker());
3874 
3875  /* Disable the subscription */
3879 
3880  /* Notify the subscription has been disabled and exit */
3881  ereport(LOG,
3882  errmsg("subscription \"%s\" has been disabled because of an error",
3883  MySubscription->name));
3884 
3885  proc_exit(0);
3886 }
3887 
3888 /*
3889  * Is current process a logical replication worker?
3890  */
3891 bool
3893 {
3894  return MyLogicalRepWorker != NULL;
3895 }
3896 
3897 /*
3898  * Start skipping changes of the transaction if the given LSN matches the
3899  * LSN specified by subscription's skiplsn.
3900  */
3901 static void
3903 {
3907 
3908  /*
3909  * Quick return if it's not requested to skip this transaction. This
3910  * function is called for every remote transaction and we assume that
3911  * skipping the transaction is not used often.
3912  */
3914  MySubscription->skiplsn != finish_lsn))
3915  return;
3916 
3917  /* Start skipping all changes of this transaction */
3918  skip_xact_finish_lsn = finish_lsn;
3919 
3920  ereport(LOG,
3921  errmsg("logical replication starts skipping transaction at LSN %X/%X",
3923 }
3924 
3925 /*
3926  * Stop skipping changes by resetting skip_xact_finish_lsn if enabled.
3927  */
3928 static void
3930 {
3931  if (!is_skipping_changes())
3932  return;
3933 
3934  ereport(LOG,
3935  (errmsg("logical replication completed skipping transaction at LSN %X/%X",
3937 
3938  /* Stop skipping changes */
3940 }
3941 
3942 /*
3943  * Clear subskiplsn of pg_subscription catalog.
3944  *
3945  * finish_lsn is the transaction's finish LSN that is used to check if the
3946  * subskiplsn matches it. If not matched, we raise a warning when clearing the
3947  * subskiplsn in order to inform users for cases e.g., where the user mistakenly
3948  * specified the wrong subskiplsn.
3949  */
3950 static void
3952 {
3953  Relation rel;
3954  Form_pg_subscription subform;
3955  HeapTuple tup;
3956  XLogRecPtr myskiplsn = MySubscription->skiplsn;
3957  bool started_tx = false;
3958 
3959  if (likely(XLogRecPtrIsInvalid(myskiplsn)))
3960  return;
3961 
3962  if (!IsTransactionState())
3963  {
3965  started_tx = true;
3966  }
3967 
3968  /*
3969  * Protect subskiplsn of pg_subscription from being concurrently updated
3970  * while clearing it.
3971  */
3972  LockSharedObject(SubscriptionRelationId, MySubscription->oid, 0,
3973  AccessShareLock);
3974 
3975  rel = table_open(SubscriptionRelationId, RowExclusiveLock);
3976 
3977  /* Fetch the existing tuple. */
3980 
3981  if (!HeapTupleIsValid(tup))
3982  elog(ERROR, "subscription \"%s\" does not exist", MySubscription->name);
3983 
3984  subform = (Form_pg_subscription) GETSTRUCT(tup);
3985 
3986  /*
3987  * Clear the subskiplsn. If the user has already changed subskiplsn before
3988  * clearing it we don't update the catalog and the replication origin
3989  * state won't get advanced. So in the worst case, if the server crashes
3990  * before sending an acknowledgment of the flush position the transaction
3991  * will be sent again and the user needs to set subskiplsn again. We can
3992  * reduce the possibility by logging a replication origin WAL record to
3993  * advance the origin LSN instead but there is no way to advance the
3994  * origin timestamp and it doesn't seem to be worth doing anything about
3995  * it since it's a very rare case.
3996  */
3997  if (subform->subskiplsn == myskiplsn)
3998  {
3999  bool nulls[Natts_pg_subscription];
4000  bool replaces[Natts_pg_subscription];
4001  Datum values[Natts_pg_subscription];
4002 
4003  memset(values, 0, sizeof(values));
4004  memset(nulls, false, sizeof(nulls));
4005  memset(replaces, false, sizeof(replaces));
4006 
4007  /* reset subskiplsn */
4008  values[Anum_pg_subscription_subskiplsn - 1] = LSNGetDatum(InvalidXLogRecPtr);
4009  replaces[Anum_pg_subscription_subskiplsn - 1] = true;
4010 
4011  tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
4012  replaces);
4013  CatalogTupleUpdate(rel, &tup->t_self, tup);
4014 
4015  if (myskiplsn != finish_lsn)
4016  ereport(WARNING,
4017  errmsg("skip-LSN of subscription \"%s\" cleared", MySubscription->name),
4018  errdetail("Remote transaction's finish WAL location (LSN) %X/%X did not match skip-LSN %X/%X.",
4019  LSN_FORMAT_ARGS(finish_lsn),
4020  LSN_FORMAT_ARGS(myskiplsn)));
4021  }
4022 
4023  heap_freetuple(tup);
4024  table_close(rel, NoLock);
4025 
4026  if (started_tx)
4028 }
4029 
4030 /* Error callback to give more context info about the change being applied */
4031 static void
4033 {
4035 
4037  return;
4038 
4039  Assert(errarg->origin_name);
4040 
4041  if (errarg->rel == NULL)
4042  {
4043  if (!TransactionIdIsValid(errarg->remote_xid))
4044  errcontext("processing remote data for replication origin \"%s\" during message type \"%s\"",
4045  errarg->origin_name,
4046  logicalrep_message_type(errarg->command));
4047  else if (XLogRecPtrIsInvalid(errarg->finish_lsn))
4048  errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" in transaction %u",
4049  errarg->origin_name,
4051  errarg->remote_xid);
4052  else
4053  errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" in transaction %u, finished at %X/%X",
4054  errarg->origin_name,
4056  errarg->remote_xid,
4057  LSN_FORMAT_ARGS(errarg->finish_lsn));
4058  }
4059  else if (errarg->remote_attnum < 0)
4060  errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" in transaction %u, finished at %X/%X",
4061  errarg->origin_name,
4063  errarg->rel->remoterel.nspname,
4064  errarg->rel->remoterel.relname,
4065  errarg->remote_xid,
4066  LSN_FORMAT_ARGS(errarg->finish_lsn));
4067  else
4068  errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" column \"%s\" in transaction %u, finished at %X/%X",
4069  errarg->origin_name,
4071  errarg->rel->remoterel.nspname,
4072  errarg->rel->remoterel.relname,
4073  errarg->rel->remoterel.attnames[errarg->remote_attnum],
4074  errarg->remote_xid,
4075  LSN_FORMAT_ARGS(errarg->finish_lsn));
4076 }
4077 
4078 /* Set transaction information of apply error callback */
4079 static inline void
4081 {
4084 }
4085 
4086 /* Reset all information of apply error callback */
4087 static inline void
4089 {
4094 }
AclResult
Definition: acl.h:183
@ ACLCHECK_OK
Definition: acl.h:184
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
Definition: aclchk.c:3485
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4746
static void check_relation_updatable(LogicalRepRelMapEntry *rel)
Definition: worker.c:1764
static void subxact_filename(char *path, Oid subid, TransactionId xid)
Definition: worker.c:3359
static void begin_replication_step(void)
Definition: worker.c:423
static void end_replication_step(void)
Definition: worker.c:446
static void cleanup_subxact_info(void)
Definition: worker.c:3504
static void apply_handle_stream_prepare(StringInfo s)
Definition: worker.c:1098
static void apply_handle_insert_internal(ApplyExecutionData *edata, ResultRelInfo *relinfo, TupleTableSlot *remoteslot)
Definition: worker.c:1742
static void maybe_reread_subscription(void)
Definition: worker.c:3057
static void subxact_info_add(TransactionId xid)
Definition: worker.c:3281
static ApplyExecutionData * create_edata_for_relation(LogicalRepRelMapEntry *rel)
Definition: worker.c:500
static MemoryContext ApplyMessageContext
Definition: worker.c:245
static void stream_cleanup_files(Oid subid, TransactionId xid)
Definition: worker.c:3380
static bool should_apply_changes_for_rel(LogicalRepRelMapEntry *rel)
Definition: worker.c:405
static void apply_handle_type(StringInfo s)
Definition: worker.c:1604
static void apply_handle_truncate(StringInfo s)
Definition: worker.c:2388
static void apply_spooled_messages(TransactionId xid, XLogRecPtr lsn)
Definition: worker.c:1375
static void UpdateWorkerStats(XLogRecPtr last_lsn, TimestampTz send_time, bool reply)
Definition: worker.c:2693
static void apply_handle_update_internal(ApplyExecutionData *edata, ResultRelInfo *relinfo, TupleTableSlot *remoteslot, LogicalRepTupleData *newtup)
Definition: worker.c:1912
static void TwoPhaseTransactionGid(Oid subid, TransactionId xid, char *gid, int szgid)
Definition: worker.c:3521
static void start_table_sync(XLogRecPtr *origin_startpos, char **myslotname)
Definition: worker.c:3542
static void subscription_change_cb(Datum arg, int cacheid, uint32 hashvalue)
Definition: worker.c:3157
static bool handle_streamed_transaction(LogicalRepMsgType action, StringInfo s)
Definition: worker.c:462
struct ApplyExecutionData ApplyExecutionData
static void changes_filename(char *path, Oid subid, TransactionId xid)
Definition: worker.c:3366
static Oid GetRelationIdentityOrPK(Relation rel)
Definition: worker.c:1620
static BufFile * stream_fd
Definition: worker.c:280
static void apply_dispatch(StringInfo s)
Definition: worker.c:2510
static void apply_handle_update(StringInfo s)
Definition: worker.c:1805
static void apply_handle_stream_commit(StringInfo s)
Definition: worker.c:1494
static void stop_skipping_changes(void)
Definition: worker.c:3929
struct ApplySubXactData ApplySubXactData
#define NAPTIME_PER_CYCLE
Definition: worker.c:199
static void get_flush_position(XLogRecPtr *write, XLogRecPtr *flush, bool *have_pending_txes)
Definition: worker.c:2630
static void apply_handle_commit_prepared(StringInfo s)
Definition: worker.c:998
static void LogicalRepApplyLoop(XLogRecPtr last_received)
Definition: worker.c:2709
bool IsLogicalWorker(void)
Definition: worker.c:3892
static ApplySubXactData subxact_data
Definition: worker.c:298
static void DisableSubscriptionAndExit(void)
Definition: worker.c:3857
static void store_flush_position(XLogRecPtr remote_lsn)
Definition: worker.c:2674
static void apply_handle_tuple_routing(ApplyExecutionData *edata, TupleTableSlot *remoteslot, LogicalRepTupleData *newtup, CmdType operation)
Definition: worker.c:2140
static ApplyErrorCallbackArg apply_error_callback_arg
Definition: worker.c:235
static bool FindReplTupleInLocalRel(EState *estate, Relation localrel, LogicalRepRelation *remoterel, TupleTableSlot *remoteslot, TupleTableSlot **localslot)
Definition: worker.c:2105
bool in_remote_transaction
Definition: worker.c:256
static XLogRecPtr skip_xact_finish_lsn
Definition: worker.c:276
static void stream_open_file(Oid subid, TransactionId xid, bool first_segment)
Definition: worker.c:3407
static void apply_handle_delete(StringInfo s)
Definition: worker.c:1977
#define is_skipping_changes()
Definition: worker.c:277
static void stream_write_change(char action, StringInfo s)
Definition: worker.c:3477
static void clear_subscription_skip_lsn(XLogRecPtr finish_lsn)
Definition: worker.c:3951
static void apply_handle_begin(StringInfo s)
Definition: worker.c:835
static dlist_head lsn_mapping
Definition: worker.c:208
static void apply_handle_delete_internal(ApplyExecutionData *edata, ResultRelInfo *relinfo, TupleTableSlot *remoteslot)
Definition: worker.c:2052
static void slot_store_data(TupleTableSlot *slot, LogicalRepRelMapEntry *rel, LogicalRepTupleData *tupleData)
Definition: worker.c:641
void ReplicationOriginNameForLogicalRep(Oid suboid, Oid relid, char *originname, Size szoriginname)
Definition: worker.c:376
static void finish_edata(ApplyExecutionData *edata)
Definition: worker.c:553
static void slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot, LogicalRepRelMapEntry *rel, LogicalRepTupleData *tupleData)
Definition: worker.c:742
static void set_apply_error_context_xact(TransactionId xid, XLogRecPtr lsn)
Definition: worker.c:4080
static void start_apply(XLogRecPtr origin_startpos)
Definition: worker.c:3585
static void apply_handle_commit(StringInfo s)
Definition: worker.c:857
static void apply_handle_stream_abort(StringInfo s)
Definition: worker.c:1274
static void apply_handle_relation(StringInfo s)
Definition: worker.c:1581
struct ApplyErrorCallbackArg ApplyErrorCallbackArg
MemoryContext ApplyContext
Definition: worker.c:246
static void subxact_info_write(Oid subid, TransactionId xid)
Definition: worker.c:3172
static void TargetPrivilegesCheck(Relation rel, AclMode mode)
Definition: worker.c:1637
static void apply_handle_prepare(StringInfo s)
Definition: worker.c:943
static void apply_handle_rollback_prepared(StringInfo s)
Definition: worker.c:1041
static void apply_handle_stream_stop(StringInfo s)
Definition: worker.c:1241
static void apply_handle_origin(StringInfo s)
Definition: worker.c:1156
static void send_feedback(XLogRecPtr recvpos, bool force, bool requestReply)
Definition: worker.c:2968
WalReceiverConn * LogRepWorkerWalRcvConn
Definition: worker.c:251
static XLogRecPtr remote_final_lsn
Definition: worker.c:257
static bool MySubscriptionValid
Definition: worker.c:254
static MemoryContext LogicalStreamingContext
Definition: worker.c:249
static void apply_handle_commit_internal(LogicalRepCommitData *commit_data)
Definition: worker.c:1528
static bool in_streamed_transaction
Definition: worker.c:260
struct SubXactInfo SubXactInfo
static void apply_handle_begin_prepare(StringInfo s)
Definition: worker.c:883
struct FlushPosition FlushPosition
void ApplyWorkerMain(Datum main_arg)
Definition: worker.c:3613
static void apply_handle_stream_start(StringInfo s)
Definition: worker.c:1174
static void maybe_start_skipping_changes(XLogRecPtr finish_lsn)
Definition: worker.c:3902
static void apply_error_callback(void *arg)
Definition: worker.c:4032
Subscription * MySubscription
Definition: worker.c:253
static void apply_handle_prepare_internal(LogicalRepPreparedTxnData *prepare_data)
Definition: worker.c:909
static void stream_close_file(void)
Definition: worker.c:3456
static TransactionId stream_xid
Definition: worker.c:262
static void apply_handle_insert(StringInfo s)
Definition: worker.c:1669
static void slot_fill_defaults(LogicalRepRelMapEntry *rel, EState *estate, TupleTableSlot *slot)
Definition: worker.c:584
static void subxact_info_read(Oid subid, TransactionId xid)
Definition: worker.c:3221
static void reset_apply_error_context_info(void)
Definition: worker.c:4088
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1719
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1573
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1537
void pgstat_report_activity(BackendState state, const char *cmd_str)
@ STATE_IDLE
@ STATE_RUNNING
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:739
static Datum values[MAXATTR]
Definition: bootstrap.c:156
size_t BufFileRead(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:582
BufFile * BufFileOpenFileSet(FileSet *fileset, const char *name, int mode, bool missing_ok)
Definition: buffile.c:286
void BufFileTell(BufFile *file, int *fileno, off_t *offset)
Definition: buffile.c:782
void BufFileTruncateFileSet(BufFile *file, int fileno, off_t offset)
Definition: buffile.c:900
void BufFileWrite(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:625
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:689
BufFile * BufFileCreateFileSet(FileSet *fileset, const char *name)
Definition: buffile.c:262
void BufFileClose(BufFile *file)
Definition: buffile.c:407
void BufFileDeleteFileSet(FileSet *fileset, const char *name, bool missing_ok)
Definition: buffile.c:359
unsigned int uint32
Definition: c.h:442
#define likely(x)
Definition: c.h:294
uint32 TransactionId
Definition: c.h:588
#define OidIsValid(objectId)
Definition: c.h:711
size_t Size
Definition: c.h:541
int64 TimestampTz
Definition: timestamp.h:39
void load_file(const char *filename, bool restricted)
Definition: dfmgr.c:144
int my_log2(long num)
Definition: dynahash.c:1760
int errmsg_internal(const char *fmt,...)
Definition: elog.c:993
void EmitErrorReport(void)
Definition: elog.c:1506
int errcode_for_file_access(void)
Definition: elog.c:718
int errdetail(const char *fmt,...)
Definition: elog.c:1039
ErrorContextCallback * error_context_stack
Definition: elog.c:94
void FlushErrorState(void)
Definition: elog.c:1651
int errcode(int sqlerrcode)
Definition: elog.c:695
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define LOG
Definition: elog.h:27
#define PG_RE_THROW()
Definition: elog.h:350
#define errcontext
Definition: elog.h:192
#define PG_TRY(...)
Definition: elog.h:309
#define WARNING
Definition: elog.h:32
#define DEBUG2
Definition: elog.h:25
#define PG_END_TRY(...)
Definition: elog.h:334
#define DEBUG1
Definition: elog.h:26
#define ERROR
Definition: elog.h:35
#define PG_CATCH(...)
Definition: elog.h:319
#define ereport(elevel,...)
Definition: elog.h:145
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:225
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:124
void ExecCloseIndices(ResultRelInfo *resultRelInfo)
Definition: execIndexing.c:231
void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
Definition: execIndexing.c:156
bool ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, bool emitError)
Definition: execMain.c:1786
void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, ResultRelInfo *partition_root_rri, int instrument_options)
Definition: execMain.c:1196
void EvalPlanQualEnd(EPQState *epqstate)
Definition: execMain.c:2939
void EvalPlanQualInit(EPQState *epqstate, EState *parentestate, Plan *subplan, List *auxrowmarks, int epqParam)
Definition: execMain.c:2518
ResultRelInfo * ExecFindPartition(ModifyTableState *mtstate, ResultRelInfo *rootResultRelInfo, PartitionTupleRouting *proute, TupleTableSlot *slot, EState *estate)
PartitionTupleRouting * ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute)
bool RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot)
bool RelationFindReplTupleByIndex(Relation rel, Oid idxoid, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot)
void ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo, EState *estate, EPQState *epqstate, TupleTableSlot *searchslot)
void CheckSubscriptionRelkind(char relkind, const char *nspname, const char *relname)
void ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, EState *estate, EPQState *epqstate, TupleTableSlot *searchslot, TupleTableSlot *slot)
void ExecSimpleRelationInsert(ResultRelInfo *resultRelInfo, EState *estate, TupleTableSlot *slot)
void ExecResetTupleTable(List *tupleTable, bool shouldFree)
Definition: execTuples.c:1191
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:83
TupleTableSlot * ExecStoreVirtualTuple(TupleTableSlot *slot)
Definition: execTuples.c:1552
TupleTableSlot * ExecInitExtraTupleSlot(EState *estate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1831
void ExecInitRangeTable(EState *estate, List *rangeTable)
Definition: execUtils.c:753
EState * CreateExecutorState(void)
Definition: execUtils.c:90
TupleConversionMap * ExecGetRootToChildMap(ResultRelInfo *resultRelInfo, EState *estate)
Definition: execUtils.c:1262
void FreeExecutorState(EState *estate)
Definition: execUtils.c:188
#define GetPerTupleExprContext(estate)
Definition: executor.h:535
#define GetPerTupleMemoryContext(estate)
Definition: executor.h:540
#define EvalPlanQualSetSlot(epqstate, slot)
Definition: executor.h:229
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:318
void FileSetInit(FileSet *fileset)
Definition: fileset.c:54
Datum OidReceiveFunctionCall(Oid functionId, StringInfo buf, Oid typioparam, int32 typmod)
Definition: fmgr.c:1648
Datum OidInputFunctionCall(Oid functionId, char *str, Oid typioparam, int32 typmod)
Definition: fmgr.c:1630
struct Latch * MyLatch
Definition: globals.c:58
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4158
@ PGC_S_OVERRIDE
Definition: guc.h:119
@ PGC_SUSET
Definition: guc.h:74
@ PGC_SIGHUP
Definition: guc.h:71
@ PGC_BACKEND
Definition: guc.h:73
void ProcessConfigFile(GucContext context)
HeapTuple heap_modify_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *replValues, bool *replIsnull, bool *doReplace)
Definition: heaptuple.c:1113
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define GETSTRUCT(TUP)
Definition: htup_details.h:649
static bool dlist_is_empty(dlist_head *head)
Definition: ilist.h:325
static void dlist_delete(dlist_node *node)
Definition: ilist.h:394
#define dlist_tail_element(type, membername, lhead)
Definition: ilist.h:562
#define dlist_foreach_modify(iter, lhead)
Definition: ilist.h:590
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition: ilist.h:353
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:281
#define dlist_container(type, membername, ptr)
Definition: ilist.h:543
void CatalogTupleUpdate(Relation heapRel, ItemPointer otid, HeapTuple tup)
Definition: indexing.c:301
#define write(a, b, c)
Definition: win32.h:14
volatile sig_atomic_t ConfigReloadPending
Definition: interrupt.c:27
void SignalHandlerForConfigReload(SIGNAL_ARGS)
Definition: interrupt.c:61
void AcceptInvalidationMessages(void)
Definition: inval.c:746
void CacheRegisterSyscacheCallback(int cacheid, SyscacheCallbackFunction func, Datum arg)
Definition: inval.c:1519
void proc_exit(int code)
Definition: ipc.c:104
int i
Definition: isn.c:73
int WaitLatchOrSocket(Latch *latch, int wakeEvents, pgsocket sock, long timeout, uint32 wait_event_info)
Definition: latch.c:524
void ResetLatch(Latch *latch)
Definition: latch.c:683
#define WL_SOCKET_READABLE
Definition: latch.h:126
#define WL_TIMEOUT
Definition: latch.h:128
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:130
#define WL_LATCH_SET
Definition: latch.h:125
void logicalrep_worker_attach(int slot)
Definition: launcher.c:565
LogicalRepWorker * MyLogicalRepWorker
Definition: launcher.c:59
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
Definition: list.c:338
List * lappend_oid(List *list, Oid datum)
Definition: list.c:374
bool list_member_oid(const List *list, Oid datum)
Definition: list.c:721
void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1046
int LOCKMODE
Definition: lockdefs.h:26
#define NoLock
Definition: lockdefs.h:34
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define AccessShareLock
Definition: lockdefs.h:36
#define RowExclusiveLock
Definition: lockdefs.h:38
@ LockTupleExclusive
Definition: lockoptions.h:58
#define LOGICALREP_PROTO_STREAM_VERSION_NUM
Definition: logicalproto.h:38
#define LOGICALREP_PROTO_TWOPHASE_VERSION_NUM
Definition: logicalproto.h:39
#define LOGICALREP_COLUMN_UNCHANGED
Definition: logicalproto.h:92
LogicalRepMsgType
Definition: logicalproto.h:53
@ LOGICAL_REP_MSG_INSERT
Definition: logicalproto.h:57
@ LOGICAL_REP_MSG_TRUNCATE
Definition: logicalproto.h:60
@ LOGICAL_REP_MSG_STREAM_STOP
Definition: logicalproto.h:69
@ LOGICAL_REP_MSG_BEGIN
Definition: logicalproto.h:54
@ LOGICAL_REP_MSG_STREAM_PREPARE
Definition: logicalproto.h:72
@ LOGICAL_REP_MSG_STREAM_ABORT
Definition: logicalproto.h:71
@ LOGICAL_REP_MSG_BEGIN_PREPARE
Definition: logicalproto.h:64
@ LOGICAL_REP_MSG_STREAM_START
Definition: logicalproto.h:68
@ LOGICAL_REP_MSG_COMMIT
Definition: logicalproto.h:55
@ LOGICAL_REP_MSG_PREPARE
Definition: logicalproto.h:65
@ LOGICAL_REP_MSG_RELATION
Definition: logicalproto.h:61
@ LOGICAL_REP_MSG_MESSAGE
Definition: logicalproto.h:63
@ LOGICAL_REP_MSG_ROLLBACK_PREPARED
Definition: logicalproto.h:67
@ LOGICAL_REP_MSG_COMMIT_PREPARED
Definition: logicalproto.h:66
@ LOGICAL_REP_MSG_TYPE
Definition: logicalproto.h:62
@ LOGICAL_REP_MSG_DELETE
Definition: logicalproto.h:59
@ LOGICAL_REP_MSG_STREAM_COMMIT
Definition: logicalproto.h:70
@ LOGICAL_REP_MSG_ORIGIN
Definition: logicalproto.h:56
@ LOGICAL_REP_MSG_UPDATE
Definition: logicalproto.h:58
uint32 LogicalRepRelId
Definition: logicalproto.h:96
#define LOGICALREP_PROTO_VERSION_NUM
Definition: logicalproto.h:37
#define LOGICALREP_COLUMN_BINARY
Definition: logicalproto.h:94
#define LOGICALREP_COLUMN_TEXT
Definition: logicalproto.h:93
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3331
char * get_rel_name(Oid relid)
Definition: lsyscache.c:1910
void getTypeInputInfo(Oid type, Oid *typInput, Oid *typIOParam)
Definition: lsyscache.c:2832
void getTypeBinaryInputInfo(Oid type, Oid *typReceive, Oid *typIOParam)
Definition: lsyscache.c:2898
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:303
MemoryContext TopTransactionContext
Definition: mcxt.c:135
char * pstrdup(const char *in)
Definition: mcxt.c:1483
void pfree(void *pointer)
Definition: mcxt.c:1306
MemoryContext TopMemoryContext
Definition: mcxt.c:130
void * palloc0(Size size)
Definition: mcxt.c:1230
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1321
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1470
void * palloc(Size size)
Definition: mcxt.c:1199
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
#define MemoryContextResetAndDeleteChildren(ctx)
Definition: memutils.h:70
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:134
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:132
char * GetUserNameFromId(Oid roleid, bool noerr)
Definition: miscinit.c:967
Oid GetUserId(void)
Definition: miscinit.c:497
CmdType
Definition: nodes.h:263
@ CMD_INSERT
Definition: nodes.h:267
@ CMD_DELETE
Definition: nodes.h:268
@ CMD_UPDATE
Definition: nodes.h:266
#define makeNode(_type_)
Definition: nodes.h:165
ObjectType get_relkind_objtype(char relkind)
TimestampTz replorigin_session_origin_timestamp
Definition: origin.c:158
RepOriginId replorigin_by_name(const char *roname, bool missing_ok)
Definition: origin.c:221
RepOriginId replorigin_create(const char *roname)
Definition: origin.c:252
void replorigin_session_setup(RepOriginId node)
Definition: origin.c:1083
RepOriginId replorigin_session_origin
Definition: origin.c:156
XLogRecPtr replorigin_session_get_progress(bool flush)
Definition: origin.c:1218
XLogRecPtr replorigin_session_origin_lsn
Definition: origin.c:157
#define InvalidRepOriginId
Definition: origin.h:33
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:135
#define ACL_DELETE
Definition: parsenodes.h:86
uint64 AclMode
Definition: parsenodes.h:81
#define ACL_INSERT
Definition: parsenodes.h:83
#define ACL_UPDATE
Definition: parsenodes.h:85
@ RTE_RELATION
Definition: parsenodes.h:1011
@ DROP_RESTRICT
Definition: parsenodes.h:1935
#define ACL_SELECT
Definition: parsenodes.h:84
#define ACL_TRUNCATE
Definition: parsenodes.h:87
int16 attnum
Definition: pg_attribute.h:83
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:207
void * arg
static PgChecksumMode mode
Definition: pg_checksums.c:65
#define NAMEDATALEN
#define MAXPGPATH
const void size_t len
static int server_version
Definition: pg_dumpall.c:110
List * find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
Definition: pg_inherits.c:256
#define lfirst(lc)
Definition: pg_list.h:170
#define NIL
Definition: pg_list.h:66
#define list_make1(x1)
Definition: pg_list.h:210
static void * list_nth(const List *list, int n)
Definition: pg_list.h:297
#define lfirst_oid(lc)
Definition: pg_list.h:172
static Datum LSNGetDatum(XLogRecPtr X)
Definition: pg_lsn.h:28
static char ** options
void FreeSubscription(Subscription *sub)
void DisableSubscription(Oid subid)
Subscription * GetSubscription(Oid subid, bool missing_ok)
#define LOGICALREP_TWOPHASE_STATE_DISABLED
#define LOGICALREP_TWOPHASE_STATE_PENDING
#define LOGICALREP_TWOPHASE_STATE_ENABLED
FormData_pg_subscription * Form_pg_subscription
#define die(msg)
Definition: pg_test_fsync.c:95
static char * buf
Definition: pg_test_fsync.c:67
long pgstat_report_stat(bool force)
Definition: pgstat.c:565
void pgstat_report_subscription_error(Oid subid, bool is_apply_error)
int64 timestamp
Expr * expression_planner(Expr *expr)
Definition: planner.c:6147
pqsigfunc pqsignal(int signo, pqsigfunc func)
int pgsocket
Definition: port.h:29
#define snprintf
Definition: port.h:238
#define PGINVALID_SOCKET
Definition: port.h:31
uintptr_t Datum
Definition: postgres.h:412
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:600
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:550
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
void BackgroundWorkerUnblockSignals(void)
Definition: postmaster.c:5601
void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
Definition: postmaster.c:5568
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:417
int pq_getmsgbyte(StringInfo msg)
Definition: pqformat.c:401
int64 pq_getmsgint64(StringInfo msg)
Definition: pqformat.c:455
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:161
static void pq_sendint64(StringInfo buf, uint64 i)
Definition: pqformat.h:153
char * c
static int fd(const char *x, int i)
Definition: preproc-init.c:105
char * s2
void logicalrep_read_commit(StringInfo in, LogicalRepCommitData *commit_data)
Definition: proto.c:109
LogicalRepRelId logicalrep_read_delete(StringInfo in, LogicalRepTupleData *oldtup)
Definition: proto.c:564
void logicalrep_read_rollback_prepared(StringInfo in, LogicalRepRollbackPreparedTxnData *rollback_data)
Definition: proto.c:336
void logicalrep_read_begin_prepare(StringInfo in, LogicalRepPreparedTxnData *begin_data)
Definition: proto.c:145
List * logicalrep_read_truncate(StringInfo in, bool *cascade, bool *restart_seqs)
Definition: proto.c:618
char * logicalrep_message_type(LogicalRepMsgType action)
Definition: proto.c:1198
void logicalrep_read_typ(StringInfo in, LogicalRepTyp *ltyp)
Definition: proto.c:756
LogicalRepRelId logicalrep_read_update(StringInfo in, bool *has_oldtuple, LogicalRepTupleData *oldtup, LogicalRepTupleData *newtup)
Definition: proto.c:492
void logicalrep_read_begin(StringInfo in, LogicalRepBeginData *begin_data)
Definition: proto.c:74
void logicalrep_read_commit_prepared(StringInfo in, LogicalRepCommitPreparedTxnData *prepare_data)
Definition: proto.c:278
LogicalRepRelation * logicalrep_read_rel(StringInfo in)
Definition: proto.c:700
void logicalrep_read_stream_abort(StringInfo in, TransactionId *xid, TransactionId *subxid)
Definition: proto.c:1185
void logicalrep_read_stream_prepare(StringInfo in, LogicalRepPreparedTxnData *prepare_data)
Definition: proto.c:376
TransactionId logicalrep_read_stream_commit(StringInfo in, LogicalRepCommitData *commit_data)
Definition: proto.c:1143
LogicalRepRelId logicalrep_read_insert(StringInfo in, LogicalRepTupleData *newtup)
Definition: proto.c:436
void logicalrep_read_prepare(StringInfo in, LogicalRepPreparedTxnData *prepare_data)
Definition: proto.c:239
TransactionId logicalrep_read_stream_start(StringInfo in, bool *first_segment)
Definition: proto.c:1093
static color newsub(struct colormap *cm, color co)
Definition: regc_color.c:389
#define RelationGetRelid(relation)
Definition: rel.h:501
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:699
#define RelationGetDescr(relation)
Definition: rel.h:527
#define RelationGetRelationName(relation)
Definition: rel.h:535
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:656
#define RelationGetNamespace(relation)
Definition: rel.h:542
Oid RelationGetPrimaryKeyIndex(Relation relation)
Definition: relcache.c:4926
Oid RelationGetReplicaIndex(Relation relation)
Definition: relcache.c:4947
void fill_extraUpdatedCols(RangeTblEntry *target_rte, Relation target_relation)
Node * build_column_default(Relation rel, int attrno)
int check_enable_rls(Oid relid, Oid checkAsUser, bool noError)
Definition: rls.c:52
@ RLS_ENABLED
Definition: rls.h:45
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:251
void PushActiveSnapshot(Snapshot snapshot)
Definition: snapmgr.c:683
void PopActiveSnapshot(void)
Definition: snapmgr.c:778
void logicalrep_partmap_reset_relmap(LogicalRepRelation *remoterel)
Definition: relation.c:523
LogicalRepRelMapEntry * logicalrep_partition_open(LogicalRepRelMapEntry *root, Relation partrel, AttrMap *map)
Definition: relation.c:585
void logicalrep_relmap_update(LogicalRepRelation *remoterel)
Definition: relation.c:157
LogicalRepRelMapEntry * logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
Definition: relation.c:319
void logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode)
Definition: relation.c:456
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:227
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
TransactionId remote_xid
Definition: worker.c:230
LogicalRepMsgType command
Definition: worker.c:225
XLogRecPtr finish_lsn
Definition: worker.c:231
LogicalRepRelMapEntry * rel
Definition: worker.c:226
ResultRelInfo * targetRelInfo
Definition: worker.c:215
EState * estate
Definition: worker.c:212
PartitionTupleRouting * proute
Definition: worker.c:219
ModifyTableState * mtstate
Definition: worker.c:218
LogicalRepRelMapEntry * targetRel
Definition: worker.c:214
uint32 nsubxacts
Definition: worker.c:292
uint32 nsubxacts_max
Definition: worker.c:293
SubXactInfo * subxacts
Definition: worker.c:295
TransactionId subxact_last
Definition: worker.c:294
Definition: attmap.h:35
int maplen
Definition: attmap.h:37
AttrNumber * attnums
Definition: attmap.h:36
List * es_range_table
Definition: execnodes.h:614
List * es_tupleTable
Definition: execnodes.h:657
List * es_opened_result_relations
Definition: execnodes.h:633
CommandId es_output_cid
Definition: execnodes.h:627
struct ErrorContextCallback * previous
Definition: elog.h:234
void(* callback)(void *arg)
Definition: elog.h:235
dlist_node node
Definition: worker.c:203
XLogRecPtr remote_end
Definition: worker.c:205
XLogRecPtr local_end
Definition: worker.c:204
ItemPointerData t_self
Definition: htup.h:65
Definition: pg_list.h:52
XLogRecPtr final_lsn
Definition: logicalproto.h:124
TransactionId xid
Definition: logicalproto.h:126
TimestampTz committime
Definition: logicalproto.h:133
LogicalRepRelation remoterel
StringInfoData * colvalues
Definition: logicalproto.h:82
TimestampTz last_recv_time
TimestampTz reply_time
FileSet * stream_fileset
XLogRecPtr reply_lsn
XLogRecPtr last_lsn
TimestampTz last_send_time
CmdType operation
Definition: execnodes.h:1257
ResultRelInfo * resultRelInfo
Definition: execnodes.h:1261
PlanState ps
Definition: execnodes.h:1256
Plan * plan
Definition: execnodes.h:1029
EState * state
Definition: execnodes.h:1031
Bitmapset * updatedCols
Definition: parsenodes.h:1184
RTEKind rtekind
Definition: parsenodes.h:1030
Form_pg_class rd_rel
Definition: rel.h:110
TupleTableSlot * ri_PartitionTupleSlot
Definition: execnodes.h:568
Relation ri_RelationDesc
Definition: execnodes.h:448
off_t offset
Definition: worker.c:286
TransactionId xid
Definition: worker.c:284
int fileno
Definition: worker.c:285
XLogRecPtr skiplsn
AttrMap * attrMap
Definition: tupconvert.h:28
TupleDesc tts_tupleDescriptor
Definition: tuptable.h:124
bool * tts_isnull
Definition: tuptable.h:128
Datum * tts_values
Definition: tuptable.h:126
dlist_node * cur
Definition: ilist.h:200
#define FirstLowInvalidHeapAttributeNumber
Definition: sysattr.h:27
#define SearchSysCacheCopy1(cacheId, key1)
Definition: syscache.h:179
@ SUBSCRIPTIONRELMAP
Definition: syscache.h:100
@ SUBSCRIPTIONOID
Definition: syscache.h:99
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91
void ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, DropBehavior behavior, bool restart_seqs)
Definition: tablecmds.c:1789
bool AllTablesyncsReady(void)
Definition: tablesync.c:1541
void invalidate_syncing_table_states(Datum arg, int cacheid, uint32 hashvalue)
Definition: tablesync.c:271
void process_syncing_tables(XLogRecPtr current_lsn)
Definition: tablesync.c:631
char * LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
Definition: tablesync.c:1205
void UpdateTwoPhaseState(Oid suboid, char new_state)
Definition: tablesync.c:1566
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void AfterTriggerEndQuery(EState *estate)
Definition: trigger.c:4974
void AfterTriggerBeginQuery(void)
Definition: trigger.c:4954
TupleTableSlot * execute_attr_map_slot(AttrMap *attrMap, TupleTableSlot *in_slot, TupleTableSlot *out_slot)
Definition: tupconvert.c:192
TupleConversionMap * convert_tuples_by_name(TupleDesc indesc, TupleDesc outdesc)
Definition: tupconvert.c:102
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:433
static TupleTableSlot * ExecCopySlot(TupleTableSlot *dstslot, TupleTableSlot *srcslot)
Definition: tuptable.h:483
static void slot_getallattrs(TupleTableSlot *slot)
Definition: tuptable.h:362
bool LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, TimestampTz origin_prepare_timestamp)
Definition: twophase.c:2577
void FinishPreparedTransaction(const char *gid, bool isCommit)
Definition: twophase.c:1480
#define TimestampTzPlusMilliseconds(tz, ms)
Definition: timestamp.h:84
@ WAIT_EVENT_LOGICAL_APPLY_MAIN
Definition: wait_event.h:43
static StringInfoData reply_message
Definition: walreceiver.c:134
int wal_receiver_status_interval
Definition: walreceiver.c:90
int wal_receiver_timeout
Definition: walreceiver.c:91
#define walrcv_startstreaming(conn, options)
Definition: walreceiver.h:422
#define walrcv_connect(conninfo, logical, appname, err)
Definition: walreceiver.h:408
#define walrcv_send(conn, buffer, nbytes)
Definition: walreceiver.h:428
#define walrcv_server_version(conn)
Definition: walreceiver.h:418
#define walrcv_endstreaming(conn, next_tli)
Definition: walreceiver.h:424
#define walrcv_identify_system(conn, primary_tli)
Definition: walreceiver.h:416
#define walrcv_receive(conn, buffer, wait_fd)
Definition: walreceiver.h:426
int WalWriterDelay
Definition: walwriter.c:70
#define SIGHUP
Definition: win32_port.h:176
static bool am_tablesync_worker(void)
bool PrepareTransactionBlock(const char *gid)
Definition: xact.c:3818
bool IsTransactionState(void)
Definition: xact.c:377
void CommandCounterIncrement(void)
Definition: xact.c:1077
void StartTransactionCommand(void)
Definition: xact.c:2925
void SetCurrentStatementStartTimestamp(void)
Definition: xact.c:898
void BeginTransactionBlock(void)
Definition: xact.c:3750
void CommitTransactionCommand(void)
Definition: xact.c:3022
void AbortOutOfAnyTransaction(void)
Definition: xact.c:4692
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:817
#define GIDSIZE
Definition: xact.h:31
XLogRecPtr GetFlushRecPtr(TimeLineID *insertTLI)
Definition: xlog.c:6077
XLogRecPtr XactLastCommitEnd
Definition: xlog.c:258
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint16 RepOriginId
Definition: xlogdefs.h:65
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:59