PostgreSQL Source Code  git master
pgoutput.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pgoutput.c
4  * Logical Replication output plugin
5  *
6  * Copyright (c) 2012-2021, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/replication/pgoutput/pgoutput.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/tupconvert.h"
16 #include "catalog/partition.h"
17 #include "catalog/pg_publication.h"
18 #include "commands/defrem.h"
19 #include "fmgr.h"
20 #include "replication/logical.h"
22 #include "replication/origin.h"
23 #include "replication/pgoutput.h"
24 #include "utils/int8.h"
25 #include "utils/inval.h"
26 #include "utils/lsyscache.h"
27 #include "utils/memutils.h"
28 #include "utils/syscache.h"
29 #include "utils/varlena.h"
30 
32 
34 
36  OutputPluginOptions *opt, bool is_init);
39  ReorderBufferTXN *txn);
41  ReorderBufferTXN *txn, XLogRecPtr commit_lsn);
43  ReorderBufferTXN *txn, Relation rel,
44  ReorderBufferChange *change);
46  ReorderBufferTXN *txn, int nrelations, Relation relations[],
47  ReorderBufferChange *change);
49  ReorderBufferTXN *txn, XLogRecPtr message_lsn,
50  bool transactional, const char *prefix,
51  Size sz, const char *message);
53  RepOriginId origin_id);
54 static void pgoutput_stream_start(struct LogicalDecodingContext *ctx,
55  ReorderBufferTXN *txn);
56 static void pgoutput_stream_stop(struct LogicalDecodingContext *ctx,
57  ReorderBufferTXN *txn);
58 static void pgoutput_stream_abort(struct LogicalDecodingContext *ctx,
59  ReorderBufferTXN *txn,
60  XLogRecPtr abort_lsn);
61 static void pgoutput_stream_commit(struct LogicalDecodingContext *ctx,
62  ReorderBufferTXN *txn,
63  XLogRecPtr commit_lsn);
64 
65 static bool publications_valid;
66 static bool in_streaming;
67 
68 static List *LoadPublications(List *pubnames);
69 static void publication_invalidation_cb(Datum arg, int cacheid,
70  uint32 hashvalue);
71 static void send_relation_and_attrs(Relation relation, TransactionId xid,
73 
74 /*
75  * Entry in the map used to remember which relation schemas we sent.
76  *
77  * The schema_sent flag determines if the current schema record was already
78  * sent to the subscriber (in which case we don't need to send it again).
79  *
80  * The schema cache on downstream is however updated only at commit time,
81  * and with streamed transactions the commit order may be different from
82  * the order the transactions are sent in. Also, the (sub) transactions
83  * might get aborted so we need to send the schema for each (sub) transaction
84  * so that we don't lose the schema information on abort. For handling this,
85  * we maintain the list of xids (streamed_txns) for those we have already sent
86  * the schema.
87  *
88  * For partitions, 'pubactions' considers not only the table's own
89  * publications, but also those of all of its ancestors.
90  */
91 typedef struct RelationSyncEntry
92 {
93  Oid relid; /* relation oid */
94 
95  /*
96  * Did we send the schema? If ancestor relid is set, its schema must also
97  * have been sent for this to be true.
98  */
100  List *streamed_txns; /* streamed toplevel transactions with this
101  * schema */
102 
105 
106  /*
107  * OID of the relation to publish changes as. For a partition, this may
108  * be set to one of its ancestors whose schema will be used when
109  * replicating changes, if publish_via_partition_root is set for the
110  * publication.
111  */
113 
114  /*
115  * Map used when replicating using an ancestor's schema to convert tuples
116  * from partition's type to the ancestor's; NULL if publish_as_relid is
117  * same as 'relid' or if unnecessary due to partition and the ancestor
118  * having identical TupleDesc.
119  */
122 
123 /* Map used to remember which relation schemas we sent. */
124 static HTAB *RelationSyncCache = NULL;
125 
126 static void init_rel_sync_cache(MemoryContext decoding_context);
127 static void cleanup_rel_sync_cache(TransactionId xid, bool is_commit);
130 static void rel_sync_cache_publication_cb(Datum arg, int cacheid,
131  uint32 hashvalue);
133  TransactionId xid);
135  TransactionId xid);
136 
137 /*
138  * Specify output plugin callbacks
139  */
140 void
142 {
144 
153 
154  /* transaction streaming */
162 }
163 
164 static void
166 {
167  ListCell *lc;
168  bool protocol_version_given = false;
169  bool publication_names_given = false;
170  bool binary_option_given = false;
171  bool messages_option_given = false;
172  bool streaming_given = false;
173 
174  data->binary = false;
175  data->streaming = false;
176  data->messages = false;
177 
178  foreach(lc, options)
179  {
180  DefElem *defel = (DefElem *) lfirst(lc);
181 
182  Assert(defel->arg == NULL || IsA(defel->arg, String));
183 
184  /* Check each param, whether or not we recognize it */
185  if (strcmp(defel->defname, "proto_version") == 0)
186  {
187  int64 parsed;
188 
189  if (protocol_version_given)
190  ereport(ERROR,
191  (errcode(ERRCODE_SYNTAX_ERROR),
192  errmsg("conflicting or redundant options")));
193  protocol_version_given = true;
194 
195  if (!scanint8(strVal(defel->arg), true, &parsed))
196  ereport(ERROR,
197  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
198  errmsg("invalid proto_version")));
199 
200  if (parsed > PG_UINT32_MAX || parsed < 0)
201  ereport(ERROR,
202  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
203  errmsg("proto_version \"%s\" out of range",
204  strVal(defel->arg))));
205 
206  data->protocol_version = (uint32) parsed;
207  }
208  else if (strcmp(defel->defname, "publication_names") == 0)
209  {
210  if (publication_names_given)
211  ereport(ERROR,
212  (errcode(ERRCODE_SYNTAX_ERROR),
213  errmsg("conflicting or redundant options")));
214  publication_names_given = true;
215 
216  if (!SplitIdentifierString(strVal(defel->arg), ',',
217  &data->publication_names))
218  ereport(ERROR,
219  (errcode(ERRCODE_INVALID_NAME),
220  errmsg("invalid publication_names syntax")));
221  }
222  else if (strcmp(defel->defname, "binary") == 0)
223  {
224  if (binary_option_given)
225  ereport(ERROR,
226  (errcode(ERRCODE_SYNTAX_ERROR),
227  errmsg("conflicting or redundant options")));
228  binary_option_given = true;
229 
230  data->binary = defGetBoolean(defel);
231  }
232  else if (strcmp(defel->defname, "messages") == 0)
233  {
234  if (messages_option_given)
235  ereport(ERROR,
236  (errcode(ERRCODE_SYNTAX_ERROR),
237  errmsg("conflicting or redundant options")));
238  messages_option_given = true;
239 
240  data->messages = defGetBoolean(defel);
241  }
242  else if (strcmp(defel->defname, "streaming") == 0)
243  {
244  if (streaming_given)
245  ereport(ERROR,
246  (errcode(ERRCODE_SYNTAX_ERROR),
247  errmsg("conflicting or redundant options")));
248  streaming_given = true;
249 
250  data->streaming = defGetBoolean(defel);
251  }
252  else
253  elog(ERROR, "unrecognized pgoutput option: %s", defel->defname);
254  }
255 }
256 
257 /*
258  * Initialize this plugin
259  */
260 static void
262  bool is_init)
263 {
264  PGOutputData *data = palloc0(sizeof(PGOutputData));
265 
266  /* Create our memory context for private allocations. */
268  "logical replication output context",
270 
271  ctx->output_plugin_private = data;
272 
273  /* This plugin uses binary protocol. */
275 
276  /*
277  * This is replication start and not slot initialization.
278  *
279  * Parse and validate options passed by the client.
280  */
281  if (!is_init)
282  {
283  /* Parse the params and ERROR if we see any we don't recognize */
285 
286  /* Check if we support requested protocol */
288  ereport(ERROR,
289  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
290  errmsg("client sent proto_version=%d but we only support protocol %d or lower",
292 
294  ereport(ERROR,
295  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
296  errmsg("client sent proto_version=%d but we only support protocol %d or higher",
298 
299  if (list_length(data->publication_names) < 1)
300  ereport(ERROR,
301  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
302  errmsg("publication_names parameter missing")));
303 
304  /*
305  * Decide whether to enable streaming. It is disabled by default, in
306  * which case we just update the flag in decoding context. Otherwise
307  * we only allow it with sufficient version of the protocol, and when
308  * the output plugin supports it.
309  */
310  if (!data->streaming)
311  ctx->streaming = false;
313  ereport(ERROR,
314  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
315  errmsg("requested proto_version=%d does not support streaming, need %d or higher",
317  else if (!ctx->streaming)
318  ereport(ERROR,
319  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
320  errmsg("streaming requested, but not supported by output plugin")));
321 
322  /* Also remember we're currently not streaming any transaction. */
323  in_streaming = false;
324 
325  /* Init publication state. */
326  data->publications = NIL;
327  publications_valid = false;
330  (Datum) 0);
331 
332  /* Initialize relation schema cache. */
334  }
335  else
336  {
337  /* Disable the streaming during the slot initialization mode. */
338  ctx->streaming = false;
339  }
340 }
341 
342 /*
343  * BEGIN callback
344  */
345 static void
347 {
348  bool send_replication_origin = txn->origin_id != InvalidRepOriginId;
349 
350  OutputPluginPrepareWrite(ctx, !send_replication_origin);
351  logicalrep_write_begin(ctx->out, txn);
352 
353  if (send_replication_origin)
354  {
355  char *origin;
356 
357  /*----------
358  * XXX: which behaviour do we want here?
359  *
360  * Alternatives:
361  * - don't send origin message if origin name not found
362  * (that's what we do now)
363  * - throw error - that will break replication, not good
364  * - send some special "unknown" origin
365  *----------
366  */
367  if (replorigin_by_oid(txn->origin_id, true, &origin))
368  {
369  /* Message boundary */
370  OutputPluginWrite(ctx, false);
371  OutputPluginPrepareWrite(ctx, true);
372  logicalrep_write_origin(ctx->out, origin, txn->origin_lsn);
373  }
374 
375  }
376 
377  OutputPluginWrite(ctx, true);
378 }
379 
380 /*
381  * COMMIT callback
382  */
383 static void
385  XLogRecPtr commit_lsn)
386 {
388 
389  OutputPluginPrepareWrite(ctx, true);
390  logicalrep_write_commit(ctx->out, txn, commit_lsn);
391  OutputPluginWrite(ctx, true);
392 }
393 
394 /*
395  * Write the current schema of the relation and its ancestor (if any) if not
396  * done yet.
397  */
398 static void
401  Relation relation, RelationSyncEntry *relentry)
402 {
403  bool schema_sent;
406 
407  /*
408  * Remember XID of the (sub)transaction for the change. We don't care if
409  * it's top-level transaction or not (we have already sent that XID in
410  * start of the current streaming block).
411  *
412  * If we're not in a streaming block, just use InvalidTransactionId and
413  * the write methods will not include it.
414  */
415  if (in_streaming)
416  xid = change->txn->xid;
417 
418  if (change->txn->toptxn)
419  topxid = change->txn->toptxn->xid;
420  else
421  topxid = xid;
422 
423  /*
424  * Do we need to send the schema? We do track streamed transactions
425  * separately, because those may be applied later (and the regular
426  * transactions won't see their effects until then) and in an order that
427  * we don't know at this point.
428  *
429  * XXX There is a scope of optimization here. Currently, we always send
430  * the schema first time in a streaming transaction but we can probably
431  * avoid that by checking 'relentry->schema_sent' flag. However, before
432  * doing that we need to study its impact on the case where we have a mix
433  * of streaming and non-streaming transactions.
434  */
435  if (in_streaming)
436  schema_sent = get_schema_sent_in_streamed_txn(relentry, topxid);
437  else
438  schema_sent = relentry->schema_sent;
439 
440  if (schema_sent)
441  return;
442 
443  /* If needed, send the ancestor's schema first. */
444  if (relentry->publish_as_relid != RelationGetRelid(relation))
445  {
446  Relation ancestor = RelationIdGetRelation(relentry->publish_as_relid);
447  TupleDesc indesc = RelationGetDescr(relation);
448  TupleDesc outdesc = RelationGetDescr(ancestor);
449  MemoryContext oldctx;
450 
451  /* Map must live as long as the session does. */
453  relentry->map = convert_tuples_by_name(CreateTupleDescCopy(indesc),
454  CreateTupleDescCopy(outdesc));
455  MemoryContextSwitchTo(oldctx);
456  send_relation_and_attrs(ancestor, xid, ctx);
457  RelationClose(ancestor);
458  }
459 
460  send_relation_and_attrs(relation, xid, ctx);
461 
462  if (in_streaming)
463  set_schema_sent_in_streamed_txn(relentry, topxid);
464  else
465  relentry->schema_sent = true;
466 }
467 
468 /*
469  * Sends a relation
470  */
471 static void
474 {
475  TupleDesc desc = RelationGetDescr(relation);
476  int i;
477 
478  /*
479  * Write out type info if needed. We do that only for user-created types.
480  * We use FirstGenbkiObjectId as the cutoff, so that we only consider
481  * objects with hand-assigned OIDs to be "built in", not for instance any
482  * function or type defined in the information_schema. This is important
483  * because only hand-assigned OIDs can be expected to remain stable across
484  * major versions.
485  */
486  for (i = 0; i < desc->natts; i++)
487  {
488  Form_pg_attribute att = TupleDescAttr(desc, i);
489 
490  if (att->attisdropped || att->attgenerated)
491  continue;
492 
493  if (att->atttypid < FirstGenbkiObjectId)
494  continue;
495 
496  OutputPluginPrepareWrite(ctx, false);
497  logicalrep_write_typ(ctx->out, xid, att->atttypid);
498  OutputPluginWrite(ctx, false);
499  }
500 
501  OutputPluginPrepareWrite(ctx, false);
502  logicalrep_write_rel(ctx->out, xid, relation);
503  OutputPluginWrite(ctx, false);
504 }
505 
506 /*
507  * Sends the decoded DML over wire.
508  *
509  * This is called both in streaming and non-streaming modes.
510  */
511 static void
513  Relation relation, ReorderBufferChange *change)
514 {
516  MemoryContext old;
517  RelationSyncEntry *relentry;
519  Relation ancestor = NULL;
520 
521  if (!is_publishable_relation(relation))
522  return;
523 
524  /*
525  * Remember the xid for the change in streaming mode. We need to send xid
526  * with each change in the streaming mode so that subscriber can make
527  * their association and on aborts, it can discard the corresponding
528  * changes.
529  */
530  if (in_streaming)
531  xid = change->txn->xid;
532 
533  relentry = get_rel_sync_entry(data, RelationGetRelid(relation));
534 
535  /* First check the table filter */
536  switch (change->action)
537  {
539  if (!relentry->pubactions.pubinsert)
540  return;
541  break;
543  if (!relentry->pubactions.pubupdate)
544  return;
545  break;
547  if (!relentry->pubactions.pubdelete)
548  return;
549  break;
550  default:
551  Assert(false);
552  }
553 
554  /* Avoid leaking memory by using and resetting our own context */
555  old = MemoryContextSwitchTo(data->context);
556 
557  maybe_send_schema(ctx, txn, change, relation, relentry);
558 
559  /* Send the data */
560  switch (change->action)
561  {
563  {
564  HeapTuple tuple = &change->data.tp.newtuple->tuple;
565 
566  /* Switch relation if publishing via root. */
567  if (relentry->publish_as_relid != RelationGetRelid(relation))
568  {
569  Assert(relation->rd_rel->relispartition);
570  ancestor = RelationIdGetRelation(relentry->publish_as_relid);
571  relation = ancestor;
572  /* Convert tuple if needed. */
573  if (relentry->map)
574  tuple = execute_attr_map_tuple(tuple, relentry->map);
575  }
576 
577  OutputPluginPrepareWrite(ctx, true);
578  logicalrep_write_insert(ctx->out, xid, relation, tuple,
579  data->binary);
580  OutputPluginWrite(ctx, true);
581  break;
582  }
584  {
585  HeapTuple oldtuple = change->data.tp.oldtuple ?
586  &change->data.tp.oldtuple->tuple : NULL;
587  HeapTuple newtuple = &change->data.tp.newtuple->tuple;
588 
589  /* Switch relation if publishing via root. */
590  if (relentry->publish_as_relid != RelationGetRelid(relation))
591  {
592  Assert(relation->rd_rel->relispartition);
593  ancestor = RelationIdGetRelation(relentry->publish_as_relid);
594  relation = ancestor;
595  /* Convert tuples if needed. */
596  if (relentry->map)
597  {
598  oldtuple = execute_attr_map_tuple(oldtuple, relentry->map);
599  newtuple = execute_attr_map_tuple(newtuple, relentry->map);
600  }
601  }
602 
603  OutputPluginPrepareWrite(ctx, true);
604  logicalrep_write_update(ctx->out, xid, relation, oldtuple,
605  newtuple, data->binary);
606  OutputPluginWrite(ctx, true);
607  break;
608  }
610  if (change->data.tp.oldtuple)
611  {
612  HeapTuple oldtuple = &change->data.tp.oldtuple->tuple;
613 
614  /* Switch relation if publishing via root. */
615  if (relentry->publish_as_relid != RelationGetRelid(relation))
616  {
617  Assert(relation->rd_rel->relispartition);
618  ancestor = RelationIdGetRelation(relentry->publish_as_relid);
619  relation = ancestor;
620  /* Convert tuple if needed. */
621  if (relentry->map)
622  oldtuple = execute_attr_map_tuple(oldtuple, relentry->map);
623  }
624 
625  OutputPluginPrepareWrite(ctx, true);
626  logicalrep_write_delete(ctx->out, xid, relation, oldtuple,
627  data->binary);
628  OutputPluginWrite(ctx, true);
629  }
630  else
631  elog(DEBUG1, "didn't send DELETE change because of missing oldtuple");
632  break;
633  default:
634  Assert(false);
635  }
636 
637  if (RelationIsValid(ancestor))
638  {
639  RelationClose(ancestor);
640  ancestor = NULL;
641  }
642 
643  /* Cleanup */
646 }
647 
648 static void
650  int nrelations, Relation relations[], ReorderBufferChange *change)
651 {
653  MemoryContext old;
654  RelationSyncEntry *relentry;
655  int i;
656  int nrelids;
657  Oid *relids;
659 
660  /* Remember the xid for the change in streaming mode. See pgoutput_change. */
661  if (in_streaming)
662  xid = change->txn->xid;
663 
664  old = MemoryContextSwitchTo(data->context);
665 
666  relids = palloc0(nrelations * sizeof(Oid));
667  nrelids = 0;
668 
669  for (i = 0; i < nrelations; i++)
670  {
671  Relation relation = relations[i];
672  Oid relid = RelationGetRelid(relation);
673 
674  if (!is_publishable_relation(relation))
675  continue;
676 
677  relentry = get_rel_sync_entry(data, relid);
678 
679  if (!relentry->pubactions.pubtruncate)
680  continue;
681 
682  /*
683  * Don't send partitions if the publication wants to send only the
684  * root tables through it.
685  */
686  if (relation->rd_rel->relispartition &&
687  relentry->publish_as_relid != relid)
688  continue;
689 
690  relids[nrelids++] = relid;
691  maybe_send_schema(ctx, txn, change, relation, relentry);
692  }
693 
694  if (nrelids > 0)
695  {
696  OutputPluginPrepareWrite(ctx, true);
698  xid,
699  nrelids,
700  relids,
701  change->data.truncate.cascade,
702  change->data.truncate.restart_seqs);
703  OutputPluginWrite(ctx, true);
704  }
705 
708 }
709 
710 static void
712  XLogRecPtr message_lsn, bool transactional, const char *prefix, Size sz,
713  const char *message)
714 {
717 
718  if (!data->messages)
719  return;
720 
721  /*
722  * Remember the xid for the message in streaming mode. See
723  * pgoutput_change.
724  */
725  if (in_streaming)
726  xid = txn->xid;
727 
728  OutputPluginPrepareWrite(ctx, true);
730  xid,
731  message_lsn,
732  transactional,
733  prefix,
734  sz,
735  message);
736  OutputPluginWrite(ctx, true);
737 }
738 
739 /*
740  * Currently we always forward.
741  */
742 static bool
744  RepOriginId origin_id)
745 {
746  return false;
747 }
748 
749 /*
750  * Shutdown the output plugin.
751  *
752  * Note, we don't need to clean the data->context as it's child context
753  * of the ctx->context so it will be cleaned up by logical decoding machinery.
754  */
755 static void
757 {
758  if (RelationSyncCache)
759  {
760  hash_destroy(RelationSyncCache);
761  RelationSyncCache = NULL;
762  }
763 }
764 
765 /*
766  * Load publications from the list of publication names.
767  */
768 static List *
770 {
771  List *result = NIL;
772  ListCell *lc;
773 
774  foreach(lc, pubnames)
775  {
776  char *pubname = (char *) lfirst(lc);
777  Publication *pub = GetPublicationByName(pubname, false);
778 
779  result = lappend(result, pub);
780  }
781 
782  return result;
783 }
784 
785 /*
786  * Publication cache invalidation callback.
787  */
788 static void
789 publication_invalidation_cb(Datum arg, int cacheid, uint32 hashvalue)
790 {
791  publications_valid = false;
792 
793  /*
794  * Also invalidate per-relation cache so that next time the filtering info
795  * is checked it will be updated with the new publication settings.
796  */
797  rel_sync_cache_publication_cb(arg, cacheid, hashvalue);
798 }
799 
800 /*
801  * START STREAM callback
802  */
803 static void
805  ReorderBufferTXN *txn)
806 {
807  bool send_replication_origin = txn->origin_id != InvalidRepOriginId;
808 
809  /* we can't nest streaming of transactions */
811 
812  /*
813  * If we already sent the first stream for this transaction then don't
814  * send the origin id in the subsequent streams.
815  */
816  if (rbtxn_is_streamed(txn))
817  send_replication_origin = false;
818 
819  OutputPluginPrepareWrite(ctx, !send_replication_origin);
821 
822  if (send_replication_origin)
823  {
824  char *origin;
825 
826  if (replorigin_by_oid(txn->origin_id, true, &origin))
827  {
828  /* Message boundary */
829  OutputPluginWrite(ctx, false);
830  OutputPluginPrepareWrite(ctx, true);
832  }
833  }
834 
835  OutputPluginWrite(ctx, true);
836 
837  /* we're streaming a chunk of transaction now */
838  in_streaming = true;
839 }
840 
841 /*
842  * STOP STREAM callback
843  */
844 static void
846  ReorderBufferTXN *txn)
847 {
848  /* we should be streaming a trasanction */
850 
851  OutputPluginPrepareWrite(ctx, true);
853  OutputPluginWrite(ctx, true);
854 
855  /* we've stopped streaming a transaction */
856  in_streaming = false;
857 }
858 
859 /*
860  * Notify downstream to discard the streamed transaction (along with all
861  * it's subtransactions, if it's a toplevel transaction).
862  */
863 static void
865  ReorderBufferTXN *txn,
866  XLogRecPtr abort_lsn)
867 {
868  ReorderBufferTXN *toptxn;
869 
870  /*
871  * The abort should happen outside streaming block, even for streamed
872  * transactions. The transaction has to be marked as streamed, though.
873  */
875 
876  /* determine the toplevel transaction */
877  toptxn = (txn->toptxn) ? txn->toptxn : txn;
878 
879  Assert(rbtxn_is_streamed(toptxn));
880 
881  OutputPluginPrepareWrite(ctx, true);
882  logicalrep_write_stream_abort(ctx->out, toptxn->xid, txn->xid);
883  OutputPluginWrite(ctx, true);
884 
885  cleanup_rel_sync_cache(toptxn->xid, false);
886 }
887 
888 /*
889  * Notify downstream to apply the streamed transaction (along with all
890  * it's subtransactions).
891  */
892 static void
894  ReorderBufferTXN *txn,
895  XLogRecPtr commit_lsn)
896 {
897  /*
898  * The commit should happen outside streaming block, even for streamed
899  * transactions. The transaction has to be marked as streamed, though.
900  */
903 
905 
906  OutputPluginPrepareWrite(ctx, true);
907  logicalrep_write_stream_commit(ctx->out, txn, commit_lsn);
908  OutputPluginWrite(ctx, true);
909 
910  cleanup_rel_sync_cache(txn->xid, true);
911 }
912 
913 /*
914  * Initialize the relation schema sync cache for a decoding session.
915  *
916  * The hash table is destroyed at the end of a decoding session. While
917  * relcache invalidations still exist and will still be invoked, they
918  * will just see the null hash table global and take no action.
919  */
920 static void
922 {
923  HASHCTL ctl;
924 
925  if (RelationSyncCache != NULL)
926  return;
927 
928  /* Make a new hash table for the cache */
929  ctl.keysize = sizeof(Oid);
930  ctl.entrysize = sizeof(RelationSyncEntry);
931  ctl.hcxt = cachectx;
932 
933  RelationSyncCache = hash_create("logical replication output relation cache",
934  128, &ctl,
936 
937  Assert(RelationSyncCache != NULL);
938 
942  (Datum) 0);
943 }
944 
945 /*
946  * We expect relatively small number of streamed transactions.
947  */
948 static bool
950 {
951  ListCell *lc;
952 
953  foreach(lc, entry->streamed_txns)
954  {
955  if (xid == (uint32) lfirst_int(lc))
956  return true;
957  }
958 
959  return false;
960 }
961 
962 /*
963  * Add the xid in the rel sync entry for which we have already sent the schema
964  * of the relation.
965  */
966 static void
968 {
969  MemoryContext oldctx;
970 
972 
973  entry->streamed_txns = lappend_int(entry->streamed_txns, xid);
974 
975  MemoryContextSwitchTo(oldctx);
976 }
977 
978 /*
979  * Find or create entry in the relation schema cache.
980  *
981  * This looks up publications that the given relation is directly or
982  * indirectly part of (the latter if it's really the relation's ancestor that
983  * is part of a publication) and fills up the found entry with the information
984  * about which operations to publish and whether to use an ancestor's schema
985  * when publishing.
986  */
987 static RelationSyncEntry *
989 {
990  RelationSyncEntry *entry;
991  bool am_partition = get_rel_relispartition(relid);
992  char relkind = get_rel_relkind(relid);
993  bool found;
994  MemoryContext oldctx;
995 
996  Assert(RelationSyncCache != NULL);
997 
998  /* Find cached relation info, creating if not found */
999  entry = (RelationSyncEntry *) hash_search(RelationSyncCache,
1000  (void *) &relid,
1001  HASH_ENTER, &found);
1002  Assert(entry != NULL);
1003 
1004  /* Not found means schema wasn't sent */
1005  if (!found)
1006  {
1007  /* immediately make a new entry valid enough to satisfy callbacks */
1008  entry->schema_sent = false;
1009  entry->streamed_txns = NIL;
1010  entry->replicate_valid = false;
1011  entry->pubactions.pubinsert = entry->pubactions.pubupdate =
1012  entry->pubactions.pubdelete = entry->pubactions.pubtruncate = false;
1013  entry->publish_as_relid = InvalidOid;
1014  }
1015 
1016  /* Validate the entry */
1017  if (!entry->replicate_valid)
1018  {
1019  List *pubids = GetRelationPublications(relid);
1020  ListCell *lc;
1022 
1023  /* Reload publications if needed before use. */
1024  if (!publications_valid)
1025  {
1027  if (data->publications)
1029 
1031  MemoryContextSwitchTo(oldctx);
1032  publications_valid = true;
1033  }
1034 
1035  /*
1036  * Build publication cache. We can't use one provided by relcache as
1037  * relcache considers all publications given relation is in, but here
1038  * we only need to consider ones that the subscriber requested.
1039  */
1040  foreach(lc, data->publications)
1041  {
1042  Publication *pub = lfirst(lc);
1043  bool publish = false;
1044 
1045  if (pub->alltables)
1046  {
1047  publish = true;
1048  if (pub->pubviaroot && am_partition)
1049  publish_as_relid = llast_oid(get_partition_ancestors(relid));
1050  }
1051 
1052  if (!publish)
1053  {
1054  bool ancestor_published = false;
1055 
1056  /*
1057  * For a partition, check if any of the ancestors are
1058  * published. If so, note down the topmost ancestor that is
1059  * published via this publication, which will be used as the
1060  * relation via which to publish the partition's changes.
1061  */
1062  if (am_partition)
1063  {
1064  List *ancestors = get_partition_ancestors(relid);
1065  ListCell *lc2;
1066 
1067  /*
1068  * Find the "topmost" ancestor that is in this
1069  * publication.
1070  */
1071  foreach(lc2, ancestors)
1072  {
1073  Oid ancestor = lfirst_oid(lc2);
1074 
1076  pub->oid))
1077  {
1078  ancestor_published = true;
1079  if (pub->pubviaroot)
1080  publish_as_relid = ancestor;
1081  }
1082  }
1083  }
1084 
1085  if (list_member_oid(pubids, pub->oid) || ancestor_published)
1086  publish = true;
1087  }
1088 
1089  /*
1090  * Don't publish changes for partitioned tables, because
1091  * publishing those of its partitions suffices, unless partition
1092  * changes won't be published due to pubviaroot being set.
1093  */
1094  if (publish &&
1095  (relkind != RELKIND_PARTITIONED_TABLE || pub->pubviaroot))
1096  {
1097  entry->pubactions.pubinsert |= pub->pubactions.pubinsert;
1098  entry->pubactions.pubupdate |= pub->pubactions.pubupdate;
1099  entry->pubactions.pubdelete |= pub->pubactions.pubdelete;
1101  }
1102 
1103  if (entry->pubactions.pubinsert && entry->pubactions.pubupdate &&
1104  entry->pubactions.pubdelete && entry->pubactions.pubtruncate)
1105  break;
1106  }
1107 
1108  list_free(pubids);
1109 
1111  entry->replicate_valid = true;
1112  }
1113 
1114  return entry;
1115 }
1116 
1117 /*
1118  * Cleanup list of streamed transactions and update the schema_sent flag.
1119  *
1120  * When a streamed transaction commits or aborts, we need to remove the
1121  * toplevel XID from the schema cache. If the transaction aborted, the
1122  * subscriber will simply throw away the schema records we streamed, so
1123  * we don't need to do anything else.
1124  *
1125  * If the transaction is committed, the subscriber will update the relation
1126  * cache - so tweak the schema_sent flag accordingly.
1127  */
1128 static void
1130 {
1131  HASH_SEQ_STATUS hash_seq;
1132  RelationSyncEntry *entry;
1133  ListCell *lc;
1134 
1135  Assert(RelationSyncCache != NULL);
1136 
1137  hash_seq_init(&hash_seq, RelationSyncCache);
1138  while ((entry = hash_seq_search(&hash_seq)) != NULL)
1139  {
1140  /*
1141  * We can set the schema_sent flag for an entry that has committed xid
1142  * in the list as that ensures that the subscriber would have the
1143  * corresponding schema and we don't need to send it unless there is
1144  * any invalidation for that relation.
1145  */
1146  foreach(lc, entry->streamed_txns)
1147  {
1148  if (xid == (uint32) lfirst_int(lc))
1149  {
1150  if (is_commit)
1151  entry->schema_sent = true;
1152 
1153  entry->streamed_txns =
1155  break;
1156  }
1157  }
1158  }
1159 }
1160 
1161 /*
1162  * Relcache invalidation callback
1163  */
1164 static void
1166 {
1167  RelationSyncEntry *entry;
1168 
1169  /*
1170  * We can get here if the plugin was used in SQL interface as the
1171  * RelSchemaSyncCache is destroyed when the decoding finishes, but there
1172  * is no way to unregister the relcache invalidation callback.
1173  */
1174  if (RelationSyncCache == NULL)
1175  return;
1176 
1177  /*
1178  * Nobody keeps pointers to entries in this hash table around outside
1179  * logical decoding callback calls - but invalidation events can come in
1180  * *during* a callback if we access the relcache in the callback. Because
1181  * of that we must mark the cache entry as invalid but not remove it from
1182  * the hash while it could still be referenced, then prune it at a later
1183  * safe point.
1184  *
1185  * Getting invalidations for relations that aren't in the table is
1186  * entirely normal, since there's no way to unregister for an invalidation
1187  * event. So we don't care if it's found or not.
1188  */
1189  entry = (RelationSyncEntry *) hash_search(RelationSyncCache, &relid,
1190  HASH_FIND, NULL);
1191 
1192  /*
1193  * Reset schema sent status as the relation definition may have changed.
1194  */
1195  if (entry != NULL)
1196  {
1197  entry->schema_sent = false;
1198  list_free(entry->streamed_txns);
1199  entry->streamed_txns = NIL;
1200  }
1201 }
1202 
1203 /*
1204  * Publication relation map syscache invalidation callback
1205  */
1206 static void
1208 {
1210  RelationSyncEntry *entry;
1211 
1212  /*
1213  * We can get here if the plugin was used in SQL interface as the
1214  * RelSchemaSyncCache is destroyed when the decoding finishes, but there
1215  * is no way to unregister the relcache invalidation callback.
1216  */
1217  if (RelationSyncCache == NULL)
1218  return;
1219 
1220  /*
1221  * There is no way to find which entry in our cache the hash belongs to so
1222  * mark the whole cache as invalid.
1223  */
1224  hash_seq_init(&status, RelationSyncCache);
1225  while ((entry = (RelationSyncEntry *) hash_seq_search(&status)) != NULL)
1226  {
1227  entry->replicate_valid = false;
1228 
1229  /*
1230  * There might be some relations dropped from the publication so we
1231  * don't need to publish the changes for them.
1232  */
1233  entry->pubactions.pubinsert = false;
1234  entry->pubactions.pubupdate = false;
1235  entry->pubactions.pubdelete = false;
1236  entry->pubactions.pubtruncate = false;
1237  }
1238 }
List * streamed_txns
Definition: pgoutput.c:100
LogicalDecodeTruncateCB truncate_cb
#define NIL
Definition: pg_list.h:65
static void pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, Relation rel, ReorderBufferChange *change)
Definition: pgoutput.c:512
PublicationActions pubactions
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:862
TupleDesc CreateTupleDescCopy(TupleDesc tupdesc)
Definition: tupdesc.c:111
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
#define AllocSetContextCreate
Definition: memutils.h:173
#define DEBUG1
Definition: elog.h:25
static bool publications_valid
Definition: pgoutput.c:65
RepOriginId origin_id
static bool in_streaming
Definition: pgoutput.c:66
void logicalrep_write_truncate(StringInfo out, TransactionId xid, int nrelids, Oid relids[], bool cascade, bool restart_seqs)
Definition: proto.c:312
bool replicate_valid
Definition: pgoutput.c:103
#define HASH_CONTEXT
Definition: hsearch.h:102
#define HASH_ELEM
Definition: hsearch.h:95
uint32 TransactionId
Definition: c.h:587
static void publication_invalidation_cb(Datum arg, int cacheid, uint32 hashvalue)
Definition: pgoutput.c:789
void logicalrep_write_message(StringInfo out, TransactionId xid, XLogRecPtr lsn, bool transactional, const char *prefix, Size sz, const char *message)
Definition: proto.c:369
MemoryContext hcxt
Definition: hsearch.h:86
#define RelationGetDescr(relation)
Definition: rel.h:495
void _PG_output_plugin_init(OutputPluginCallbacks *cb)
Definition: pgoutput.c:141
static void parse_output_parameters(List *options, PGOutputData *data)
Definition: pgoutput.c:165
void logicalrep_write_stream_stop(StringInfo out)
Definition: proto.c:817
static void maybe_send_schema(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, ReorderBufferChange *change, Relation relation, RelationSyncEntry *relentry)
Definition: pgoutput.c:399
MemoryContext context
Definition: pgoutput.h:20
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
static void rel_sync_cache_relation_cb(Datum arg, Oid relid)
Definition: pgoutput.c:1165
char get_rel_relkind(Oid relid)
Definition: lsyscache.c:1974
static void pgoutput_stream_start(struct LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
Definition: pgoutput.c:804
TupleConversionMap * map
Definition: pgoutput.c:120
static void pgoutput_shutdown(LogicalDecodingContext *ctx)
Definition: pgoutput.c:756
struct ReorderBufferTXN * txn
Definition: reorderbuffer.h:87
static void pgoutput_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, XLogRecPtr commit_lsn)
Definition: pgoutput.c:384
LogicalDecodeMessageCB message_cb
LogicalDecodeStreamMessageCB stream_message_cb
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
uint16 RepOriginId
Definition: xlogdefs.h:65
Size entrysize
Definition: hsearch.h:76
#define strVal(v)
Definition: value.h:54
void logicalrep_write_stream_abort(StringInfo out, TransactionId xid, TransactionId subxid)
Definition: proto.c:877
int errcode(int sqlerrcode)
Definition: elog.c:698
static void send_relation_and_attrs(Relation relation, TransactionId xid, LogicalDecodingContext *ctx)
Definition: pgoutput.c:472
void * output_plugin_private
Definition: logical.h:75
void logicalrep_write_commit(StringInfo out, ReorderBufferTXN *txn, XLogRecPtr commit_lsn)
Definition: proto.c:75
MemoryContext context
Definition: logical.h:35
static void pgoutput_message(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, XLogRecPtr message_lsn, bool transactional, const char *prefix, Size sz, const char *message)
Definition: pgoutput.c:711
void logicalrep_write_delete(StringInfo out, TransactionId xid, Relation rel, HeapTuple oldtuple, bool binary)
Definition: proto.c:260
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:143
#define llast_oid(l)
Definition: pg_list.h:196
LogicalDecodeStreamAbortCB stream_abort_cb
List * output_plugin_options
Definition: logical.h:58
void logicalrep_write_origin(StringInfo out, const char *origin, XLogRecPtr origin_lsn)
Definition: proto.c:113
#define PG_UINT32_MAX
Definition: c.h:525
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
bool replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
Definition: origin.c:449
Form_pg_class rd_rel
Definition: rel.h:109
unsigned int Oid
Definition: postgres_ext.h:31
struct RelationSyncEntry RelationSyncEntry
static void rel_sync_cache_publication_cb(Datum arg, int cacheid, uint32 hashvalue)
Definition: pgoutput.c:1207
enum ReorderBufferChangeType action
Definition: reorderbuffer.h:84
void list_free_deep(List *list)
Definition: list.c:1405
void logicalrep_write_insert(StringInfo out, TransactionId xid, Relation rel, HeapTuple newtuple, bool binary)
Definition: proto.c:142
Publication * GetPublicationByName(const char *pubname, bool missing_ok)
XLogRecPtr origin_lsn
void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func, Datum arg)
Definition: inval.c:1477
#define foreach_delete_current(lst, cell)
Definition: pg_list.h:369
void logicalrep_write_stream_start(StringInfo out, TransactionId xid, bool first_segment)
Definition: proto.c:783
OutputPluginOutputType output_type
Definition: output_plugin.h:28
#define rbtxn_is_streamed(txn)
Definition: dynahash.c:219
bool defGetBoolean(DefElem *def)
Definition: define.c:111
List * GetRelationPublications(Oid relid)
static void pgoutput_stream_commit(struct LogicalDecodingContext *ctx, ReorderBufferTXN *txn, XLogRecPtr commit_lsn)
Definition: pgoutput.c:893
void logicalrep_write_rel(StringInfo out, TransactionId xid, Relation rel)
Definition: proto.c:396
#define ERROR
Definition: elog.h:46
#define RelationIsValid(relation)
Definition: rel.h:442
#define lfirst_int(lc)
Definition: pg_list.h:170
LogicalDecodeCommitCB commit_cb
void(* LogicalOutputPluginInit)(struct OutputPluginCallbacks *cb)
Definition: output_plugin.h:36
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
#define LOGICALREP_PROTO_MIN_VERSION_NUM
Definition: logicalproto.h:31
TupleConversionMap * convert_tuples_by_name(TupleDesc indesc, TupleDesc outdesc)
Definition: tupconvert.c:102
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3753
bool messages
Definition: pgoutput.h:29
bool is_publishable_relation(Relation rel)
static void pgoutput_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init)
Definition: pgoutput.c:261
List * publication_names
Definition: pgoutput.h:25
#define InvalidTransactionId
Definition: transam.h:31
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:349
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:203
unsigned int uint32
Definition: c.h:441
#define LOGICALREP_PROTO_MAX_VERSION_NUM
Definition: logicalproto.h:34
struct ReorderBufferTXN * toptxn
void RelationClose(Relation relation)
Definition: relcache.c:2098
HeapTuple execute_attr_map_tuple(HeapTuple tuple, TupleConversionMap *map)
Definition: tupconvert.c:139
void logicalrep_write_stream_commit(StringInfo out, ReorderBufferTXN *txn, XLogRecPtr commit_lsn)
Definition: proto.c:826
union ReorderBufferChange::@97 data
static RelationSyncEntry * get_rel_sync_entry(PGOutputData *data, Oid relid)
Definition: pgoutput.c:988
List * lappend_int(List *list, int datum)
Definition: list.c:354
bool get_rel_relispartition(Oid relid)
Definition: lsyscache.c:1998
Node * arg
Definition: parsenodes.h:747
List * lappend(List *list, void *datum)
Definition: list.c:336
void logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn)
Definition: proto.c:46
static HTAB * RelationSyncCache
Definition: pgoutput.c:124
static bool get_schema_sent_in_streamed_txn(RelationSyncEntry *entry, TransactionId xid)
Definition: pgoutput.c:949
#define HASH_BLOBS
Definition: hsearch.h:97
void OutputPluginUpdateProgress(struct LogicalDecodingContext *ctx)
Definition: logical.c:655
void CacheRegisterSyscacheCallback(int cacheid, SyscacheCallbackFunction func, Datum arg)
Definition: inval.c:1435
void * palloc0(Size size)
Definition: mcxt.c:1093
LogicalDecodeChangeCB change_cb
uintptr_t Datum
Definition: postgres.h:411
LogicalDecodeStreamTruncateCB stream_truncate_cb
TransactionId xid
Size keysize
Definition: hsearch.h:75
PG_MODULE_MAGIC
Definition: pgoutput.c:31
#define InvalidOid
Definition: postgres_ext.h:36
#define ereport(elevel,...)
Definition: elog.h:157
#define LOGICALREP_PROTO_STREAM_VERSION_NUM
Definition: logicalproto.h:33
static void set_schema_sent_in_streamed_txn(RelationSyncEntry *entry, TransactionId xid)
Definition: pgoutput.c:967
void OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write)
Definition: logical.c:629
bool list_member_oid(const List *list, Oid datum)
Definition: list.c:689
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
static bool pgoutput_origin_filter(LogicalDecodingContext *ctx, RepOriginId origin_id)
Definition: pgoutput.c:743
size_t Size
Definition: c.h:540
bool binary
Definition: pgoutput.h:27
static int list_length(const List *l)
Definition: pg_list.h:149
LogicalDecodeShutdownCB shutdown_cb
LogicalDecodeStreamCommitCB stream_commit_cb
static List * LoadPublications(List *pubnames)
Definition: pgoutput.c:769
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1436
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1426
LogicalDecodeStartupCB startup_cb
#define InvalidRepOriginId
Definition: origin.h:33
List * publications
Definition: pgoutput.h:26
struct ReorderBufferChange::@97::@98 tp
LogicalDecodeStreamStartCB stream_start_cb
struct ReorderBufferChange::@97::@99 truncate
static void init_rel_sync_cache(MemoryContext decoding_context)
Definition: pgoutput.c:921
#define FirstGenbkiObjectId
Definition: transam.h:188
static void cleanup_rel_sync_cache(TransactionId xid, bool is_commit)
Definition: pgoutput.c:1129
int errmsg(const char *fmt,...)
Definition: elog.c:909
static void pgoutput_stream_stop(struct LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
Definition: pgoutput.c:845
void OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write)
Definition: logical.c:642
void list_free(List *list)
Definition: list.c:1391
#define elog(elevel,...)
Definition: elog.h:232
StringInfo out
Definition: logical.h:70
int i
void logicalrep_write_typ(StringInfo out, TransactionId xid, Oid typoid)
Definition: proto.c:450
void * arg
LogicalDecodeBeginCB begin_cb
char * defname
Definition: parsenodes.h:746
void logicalrep_write_update(StringInfo out, TransactionId xid, Relation rel, HeapTuple oldtuple, HeapTuple newtuple, bool binary)
Definition: proto.c:186
LogicalDecodeStreamStopCB stream_stop_cb
static void pgoutput_truncate(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, int nrelations, Relation relations[], ReorderBufferChange *change)
Definition: pgoutput.c:649
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227
static void pgoutput_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
Definition: pgoutput.c:346
bool streaming
Definition: pgoutput.h:28
LogicalDecodeFilterByOriginCB filter_by_origin_cb
Definition: pg_list.h:50
LogicalDecodeStreamChangeCB stream_change_cb
List * get_partition_ancestors(Oid relid)
Definition: partition.c:133
#define RelationGetRelid(relation)
Definition: rel.h:469
Relation RelationIdGetRelation(Oid relationId)
Definition: relcache.c:1992
#define lfirst_oid(lc)
Definition: pg_list.h:171
static void pgoutput_stream_abort(struct LogicalDecodingContext *ctx, ReorderBufferTXN *txn, XLogRecPtr abort_lsn)
Definition: pgoutput.c:864
MemoryContext CacheMemoryContext
Definition: mcxt.c:51
PublicationActions pubactions
Definition: pgoutput.c:104
bool scanint8(const char *str, bool errorOK, int64 *result)
Definition: int8.c:55
#define AssertVariableIsOfType(varname, typename)
Definition: c.h:963
uint32 protocol_version
Definition: pgoutput.h:24